Hello everyone,
I am new to w&b, so this might be a beginner question, but I was wondering why when I run
wandb.log_artifact(file_path, name='dataset', type='dataset')
I am able to log artifacts correctly without many issues, whereas if I use the example provided here
def load_and_log():
# 🚀 start a run, with a type to label it and a project it can call home
with wandb.init(project="artifacts-example", job_type="load-data") as run:
datasets = load() # separate code for loading the datasets
names = ["training", "validation", "test"]
# 🏺 create our Artifact
raw_data = wandb.Artifact(
"mnist-raw", type="dataset",
description="Raw MNIST dataset, split into train/val/test",
metadata={"source": "torchvision.datasets.MNIST",
"sizes": [len(dataset) for dataset in datasets]})
for name, data in zip(names, datasets):
# 🐣 Store a new file in the artifact, and write something into its contents.
with raw_data.new_file(name + ".pt", mode="wb") as file:
x, y = data.tensors
torch.save((x, y), file)
# ✍️ Save the artifact to W&B.
run.log_artifact(raw_data)
load_and_log()
I get the artifacts stored in a run_table, and it makes versioning impossible.
Am I doing something wrong? Below you can find the same function as I modified it for my project, in case I might have missed something
from wandb.sdk import wandb_init
def load_and_log():
# 🚀 start a run, with a type to label it and a project it can call home
with wandb.init(project="project", job_type="load-data", resume="allow") as run:
dataset = my_function(dir_path + '/datas', MAX_SAMPLES, MAX_LENGTH) #returns a tuple of lists
datasets = dataset.load() # separate code for loading the datasets
names = ["questions", "answers"]
# 🏺 create our Artifact
raw_data = wandb.Artifact(
"dataset", type="dataset",
description="json of the preprocessed dataset - not split",
metadata={"source": "https://source.php",
"sizes": [len(dataset) for dataset in datasets]})
# transfer lists into table
table = wandb.Table(columns=[], data=[])
for name, dataset in zip(names, datasets):
table.add_column(name=f"{name}", data=dataset)
# ✍️ Save the artifact to W&B.
wandb.log({f"dataset_{MAX_SAMPLES}_{MAX_LENGTH}": table})
load_and_log()
Thank you in advance if you have an answer!