I actually solved it now:
It seems that the optimizer that i used caused errors in the generation of a value for the learning rate when starting a new trial. Once I took the optimizer back out, the follwing implementation worked and generated separate logs in my wandb dashboard:
wandb_kwargs = {"project": "my-project"}
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, as_multirun=True)
@wandbc.track_in_wandb()
def objective(trial):
training_args = Seq2SeqTrainingArguments(
"tuning",
num_train_epochs=1,
# num_train_epochs = trial.suggest_categorical('num_epochs', [3, 5, 8]),
per_device_eval_batch_size=3,
per_device_train_batch_size=3,
learning_rate= trial.suggest_float('learning_rate', low=0.00004, high=0.0001, step=0.0005, log=False),
# per_device_train_batch_size= trial.suggest_categorical('batch_size', [6, 8, 12, 18]),
# per_device_eval_batch_size= trial.suggest_categorical('batch_size', [6, 8, 12, 18]),
disable_tqdm=True,
predict_with_generate=True,
gradient_accumulation_steps=4,
# gradient_checkpointing=True,
# weight_decay= False
seed = 12,
warmup_steps=5,
# evaluation and logging
evaluation_strategy = "epoch",
save_strategy = "epoch",
save_total_limit=1,
logging_strategy="epoch",
logging_steps = 1,
load_best_model_at_end=True,
metric_for_best_model = "eval_loss",
# use_cache=False,
push_to_hub=False,
fp16=False,
remove_unused_columns=True
)
# optimizer = Adafactor(
# t5dmodel.parameters(),
# lr=trial.suggest_float('learning_rate', low=4e-5, high=0.0001), # ('learning_rate', 1e-6, 1e-3),
# # weight_decay=trial.suggest_float('weight_decay', WD_MIN, WD_CEIL),
# # lr=1e-3,
# eps=(1e-30, 1e-3),
# clip_threshold=1.0,
# decay_rate=-0.8,
# beta1=None,
# # weight_decay= False
# weight_decay=0.1,
# relative_step=False,
# scale_parameter=False,
# warmup_init=False,
# )
# lr_scheduler = AdafactorSchedule(optimizer)
data_collator = DataCollatorForSeq2Seq(tokenizer, model=t5dmodel)
trainer = Seq2SeqTrainer(model=t5dmodel,
args=training_args,
train_dataset=tokenized_train_dataset['train'],
eval_dataset=tokenized_val_dataset['validation'],
data_collator=data_collator,
tokenizer=tokenizer,
# optimizers=(optimizer, lr_scheduler)
)
trainer.train()
scores = trainer.evaluate()
return scores['eval_loss']
if __name__ == '__main__':
t5dmodel = AutoModelForSeq2SeqLM.from_pretrained("yhavinga/t5-base-dutch", use_cache=False)
tokenizer = AutoTokenizer.from_pretrained("yhavinga/t5-base-dutch", additional_special_tokens=None)
features = {
'WordRatioFeature': {'target_ratio': 0.8},
'CharRatioFeature': {'target_ratio': 0.8},
'LevenshteinRatioFeature': {'target_ratio': 0.8},
'WordRankRatioFeature': {'target_ratio': 0.8},
'DependencyTreeDepthRatioFeature': {'target_ratio': 0.8}
}
trainset_processed = get_train_data(WIKILARGE_PROCESSED, 0, 10)
print(trainset_processed)
valset_processed = get_validation_data(WIKILARGE_PROCESSED, 0,7)
print(valset_processed)
tokenized_train_dataset = trainset_processed.map((tokenize_train), batched=True, batch_size=1)
tokenized_val_dataset = valset_processed.map((tokenize_train), batched=True, batch_size=1)
print('Triggering Optuna study')
study = optuna.create_study( direction='minimize', pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=4,callbacks=[wandbc], gc_after_trial=True)