The problem is that, I was trying to perform hyper-parameter sweep using wandb, the first sweep runs for set no. of epochs, but the consecutive sweeps just run for 1 epoch. For proof I attach the image in which you can observe a drastic decrease in runtime as sweeps progress.
Here is my code for performing sweep :
wandb.login()
NAME = sweep_config['parameters']['model_name']['value']+f"__var-{sweep_config['parameters']['num_classes']['value']}"+ \
f"__fold-{sweep_config['parameters']['fold']['value']}"
print('NAME : ' , NAME , '\n\n')
sweep_id = wandb.sweep(sweep_config, project=NAME)
def tune_hyperparams(config = None):
with wandb.init(config = config):
config = wandb.config
print(config,'\n\n\n')
num_workers = 8
tr_loader = DataLoader(tr_dataset, batch_size=config['BATCH_SIZE'], shuffle=True, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=config['BATCH_SIZE'], shuffle=False, num_workers=num_workers)
model_obj = DenseNet(densenet_variant = config['model_size'] , in_channels=config['in_channels'],
num_classes=config['num_classes'] , compression_factor=0.3 , k = 32 , config=config)
model = Classifier(model_obj)
run_name = f"lr_{config['lr']} *** bs{config['BATCH_SIZE']} *** decay_{config['weight_decay']}"
wandb_logger = WandbLogger(project=NAME , name = run_name)
trainer = Trainer(callbacks=[early_stop_callback, rich_progress_bar],
accelerator = 'gpu' ,max_epochs=config['epochs'], logger=[wandb_logger] , devices=find_usable_cuda_devices(1))
trainer.fit(model, tr_loader, val_loader)
wandb.finish()
wandb.agent(sweep_id, tune_hyperparams, count=30)
Pls tell how to tackle this problem…
Thanks in advance…