Hello everyone
I am relatively new to using wandb and I’m encountering some issues with logging and then immediately using artifacts within the same run. My goal is to log artifacts and then use them right away, unless there’s a reason I should avoid doing this.
Here’s what I do:
- Initialize the run using setup_wandb.
- Log the data using load_data() function and immediately download it again in train_classifier(). This part works, but the data only appears as an output and not as an input under artifacts in wandb.
- Log a model I also try to log the model after training and intend to download it for testing. However, this seems not to work, although the model appears under files on wandb when the run is finished and the status after uploading is COMMITTED. What am I missing?
Here are the relevant code snippets:
def setup_wandb(self, sweep_id, is_sweep):
run = wandb.init(entity=WANDB_ENTITY,
project=PROJECT_NAME,
id=run_id,
config=json.load(file),
notes=f'{self.classifier_name} {self.dataset_name} {self.phoneme_recognizer_name} {self.representation}-grams idf:{self.use_idf}',
dir=run_folder,
resume="allow",
job_type="sweep" if is_sweep else "run")
print(f"Run initialized with ID: {run_id} in dir: {run_folder}")
run.name = f"{run_id}"
def log_data(self):
paths = {
"train": self.get_split_path('train'),
"valid": self.get_split_path('valid'),
"test": self.get_split_path('test')
}
data_artifact = wandb.Artifact(
f'{self.phoneme_recognizer_name}-{self.dataset_name}-dataset', # Artifact's name
type="dataset",
description=f"Preprocessed dataset for {self.phoneme_recognizer_name} {self.dataset_name}, split into train/valid/test",
metadata={"sizes": {name: os.path.getsize(path) for name, path in paths.items()}}
)
for name, path in paths.items():
data_artifact.add_file(path, name=f"{name}.csv")
data_artifact_result = wandb.run.log_artifact(data_artifact).wait()
print(f"data_artifact status: {data_artifact_result.state}")
def train_classifier(self):
artifact_data = wandb.run.use_artifact(f'{self.phoneme_recognizer_name}-{self.dataset_name}-dataset:latest')
artifact_data_dir = artifact_data.download()
print('artifact_data_dir:', artifact_data_dir)
train_path = os.path.join(artifact_data_dir, 'train.csv')
valid_path = os.path.join(artifact_data_dir, 'valid.csv')
df_train = pd.read_csv(train_path)
df_valid = pd.read_csv(valid_path)
# Logging Model to wandb
model_artifact = wandb.Artifact(
'trained_model', # Artifact's name
type='model', # Artifact's type
description=f'Trained {self.classifier_name} model on {self.phoneme_recognizer_name} transcriptions and {self.dataset_name} dataset'
)
with model_artifact.new_file('trained_pipeline.pkl', mode='wb') as file:
dill.dump(pipeline, file)
model_artifact_result = wandb.run.log_artifact(model_artifact).wait()
print(f"model_artifact status: {model_artifact_result.state}")
def test(self):
data_artifact = wandb.run.use_artifact(f'{self.phoneme_recognizer_name}-{self.dataset_name}-dataset:latest')
data_artifact_dir = data_artifact.download()
test_path = os.path.join(data_artifact_dir, 'test.csv')
df_test = pd.read_csv(test_path)
model_artifact = wandb.run.use_artifact(f'{WANDB_ENTITY}/{PROJECT_NAME}/trained-model:latest', type='model')
model_artifact_dir = model_artifact.download()
model_file_path = os.path.join(model_artifact_dir, 'trained_pipeline.pkl')
try:
pipeline = dill.load(open(model_file_path, 'rb'))
print("Model successfully loaded.")
except Exception as e:
print(f"Failed to load model: {e}")
Here is the error message I receive:
model_artifact status: COMMITTED
wandb: 3 of 3 files downloaded.
wandb: ERROR Unable to fetch artifact with name <WANDB_ENTITY>/<PROJECT_NAME>/trained-model:latest
Traceback (most recent call last):
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/apis/normalize.py", line 41, in wrapper
return func(*args, **kwargs)
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/apis/public/api.py", line 958, in artifact
artifact = wandb.Artifact._from_name(
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/sdk/artifacts/artifact.py", line 263, in _from_name
raise ValueError(
ValueError: Unable to fetch artifact with name <WANDB_ENTITY>/<PROJECT_NAME>/trained-model:latest
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/.../.../<myscript>.py", line 64, in <module>
classifier.test()
File "/Users/.../.../<myscript>.py", line 330, in test
model_artifact = wandb.run.use_artifact(f'{WANDB_ENTITY}/{PROJECT_NAME}/trained-model:latest', type='model')
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 371, in wrapper_fn
return func(self, *args, **kwargs)
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 361, in wrapper
return func(self, *args, **kwargs)
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/sdk/wandb_run.py", line 2859, in use_artifact
artifact = public_api.artifact(type=type, name=name)
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/apis/normalize.py", line 87, in wrapper
raise CommError(message, err).with_traceback(sys.exc_info()[2])
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/apis/normalize.py", line 41, in wrapper
return func(*args, **kwargs)
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/apis/public/api.py", line 958, in artifact
artifact = wandb.Artifact._from_name(
File "/Library/anaconda3/envs/MLOps-Dialects/lib/python3.9/site-packages/wandb/sdk/artifacts/artifact.py", line 263, in _from_name
raise ValueError(
wandb.errors.CommError: Unable to fetch artifact with name XXX/XXX-Test/trained-model:latest