Hello i run a hyperparameter optimization and want to log the results with wandb. Unfortunately the metric names of the individual trials get changed so i cannot easily see the runs all together. The metrics of the first trial all have the right name, but each trial after that the metric names are getting a ‘_1’ appended and a ‘_2’ for the third trial and so on.
This is the code i use:
def train_model(model, train_ds, val_ds, save_path, folder, epochs=100, patience = 20, cbs=[], hps=False, verbose=1,):
'''
This function trains the model on the given datasets. It uses the Keras API.
Parameters:
model: keras.Model, the model to train
train_ds: tf.data.Dataset, training dataset
val_ds: tf.data.Dataset, validation dataset
save_path: str, path to the folder where the model should be saved
folder: str, name of a subfolder inside save_path where the model should be saved; folder gets created; for example the date and time of training
epochs: int, number of epochs to train the model
patience: int, number of epochs to wait before early stopping
cbs: list, list of addidional callbacks to use during training, in addition to the early stopping and model checkpoint
use_wandb: bool, whether to use weights and biases for logging and visualization
hps: bool, whether this function is used for hyperparameter search or normal training
verbose: int, whether to show information during each epoch of training
Returns:
for hps=False:
model: keras.Model, the trained model associated with the best validation f1 score
history: dict, history of the training process, contains the loss and metrics for each epoch
for hps=True:
best_val_f1: float, the best f1 score on the validation set
'''
model_name = model.name
checkpoints = os.path.join(save_path, folder,model_name)
if not os.path.exists(os.path.join(save_path, folder)):
os.makedirs(os.path.join(save_path, folder), exist_ok=True)
print('savedir:',checkpoints)
callbacks = []
callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=patience, verbose=1, mode='max',))
callbacks.append(tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, verbose=1, mode='min', min_delta=0.001, cooldown=0, min_lr=0))
callbacks.append(tf.keras.callbacks.ModelCheckpoint(filepath=checkpoints+'/best_model', monitor='val_f1_score', save_best_only=True, save_weights_only=False, mode='max',))
for cb in cbs:
callbacks.append(cb)
history = model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=callbacks, verbose=verbose)
print('saved model to:',checkpoints)
#load the best model
model = load_model(checkpoints+'/best_model', custom_objects={'f1_score': tfa.metrics.F1Score})
#save model as h5 file
model.save(checkpoints+'/best_model.h5')
if hps:
best_val_f1 = max(history.history['val_f1_score'])
return best_val_f1
else:
return model, history
def find_best_model(train_ds, val_ds, test_ds, save_path, classification_type='multiclass', models='all', model_sizes=['s'], max_epochs=60, eval_metric='f1',iterations=100):
'''
This function does a Hyperparameter search to find the best model for the given dataset. It uses the Keras API.
It uses the Optuna library for the Hyperparameter search
It performs the following operations:
- defines an objective function for the Hyperparameter search
- defines the search algorithm and pruner for the Hyperparameter search (based on optuna)
- selects a set of hyperparameters based on the trial sampler
- builds a model with the selected hyperparameters
- trains the model on the training dataset
- evaluates the model on the validation dataset
- compare the models based on the evaluation metric with the other models
- bad models are pruned (the training is stopped early if the model is not performing well)
- it repeats this process for a given number of iterations
- the performances are loged with weights and biases and can be viewed in the dashboard
Parameters:
train_ds: tf.data.Dataset, training dataset
val_ds: tf.data.Dataset, validation dataset
test_ds: tf.data.Dataset, test dataset
save_path: str, path to the folder where the best model should be saved
classification_type: str, type of classification; should be 'multiclass' or 'multilabel; multiclass is used when only one class can be present in the image,
multilabel is used when multiple classes can be present in the image'
models: list/str, list of models to search for; should be one or a combination of the following: 'resnet', 'vgg', 'inceptionv3', 'mobilenetv2', 'xception','efficientnet'; if 'all' is used, all models are searched for
model_sizes: list, list of sizes for the models; should be one or a combination of the following: 's', 'm', 'l'
eval_metric: str, evaluation metric to use for the Hyperparameter search; should be one of the following: 'accuracy', 'precision', 'recall', 'f1'
iterations: int, number of iterations for the Hyperparameter search (should be at least 20)
The Hyperparameters that are optimized are the following:
- model_type: the model architecture to use
- model_size: the size of the model (small, medium, large)
- learning_rate: the learning rate for the optimizer (one of the most important hyperparameters)
- optimizer: the optimizer to use for the model
- classifier_layers: the number of (fully-connected)-layers in the classifier(or head) of the model
- classifier_units: the number of units in each layer of the classifier
- dropout: the dropout rate for the classifier (used to prevent overfitting)
- l2_reg: the l2 regularization parameter for the model (used to prevent overfitting)
- use_weights: whether to use imagenet weights for the model
- trainable_layers: the number of layers to make trainable in the pretrained model(only used for transfer learning, if using imagenet weights)
'''
assert isinstance(train_ds, tf.data.Dataset), 'train_ds should be a tf.data.Dataset object'
assert isinstance(val_ds, tf.data.Dataset), 'val_ds should be a tf.data.Dataset object'
if test_ds:
assert isinstance(test_ds, tf.data.Dataset), 'test_ds should be a tf.data.Dataset object'
assert classification_type in ['multiclass', 'multilabel'], 'classification_type should be one of the following: multiclass, multilabel'
assert eval_metric in ['accuracy', 'precision', 'recall', 'f1'], 'eval_metric should be one of the following: accuracy, precision, recall, f1'
assert iterations >= 20, 'iterations should be at least 20'
assert isinstance(models, list) or models == 'all', 'models should be a list or all'
assert models == 'all' or all([model in ['resnet', 'vgg', 'inceptionv3', 'mobilenetv2', 'xception','efficientnet'] for model in models]), 'models should be one or a combination of the following: resnet, vgg, inceptionv3, mobilenetv2, xception, efficientnet'
assert all([model_size in ['s', 'm', 'l'] for model_size in model_sizes]), 'model_sizes should be one or a combination of the following: s, m, l'
def HPO_objective(trial):
'''
Objective function for the Hyperparameter search. It is used to find the best hyperparameters for the model.
We define it inside the other function so we can use the dataset from the outer function without passing it as an argument.
It performs the following operations:
- selects a set of hyperparameters based on the trial sampler
- builds a model with the selected hyperparameters
- trains the model on the training dataset
- evaluates the model on the validation dataset
- returns the evaluation metric
The Hyperparameters are the following:
- model_type: the model architecture to use
- model_size: the size of the model (small, medium, large)
- learning_rate: the learning rate for the optimizer (one of the most important hyperparameters)
- optimizer: the optimizer to use for the model
- classifier_layers: the number of (fully-connected)-layers in the classifier(or head) of the model
- classifier_units: the number of units in each layer of the classifier
- dropout: the dropout rate for the classifier (used to prevent overfitting)
- l2_reg: the l2 regularization parameter for the model (used to prevent overfitting)
- use_weights: whether to use imagenet weights for the model
- trainable_layers: the number of layers to make trainable in the pretrained model(only used for transfer learning, if using imagenet weights)
'''
#define the hyperparameters to search for
model_types = ['resnet', 'inceptionv3', 'mobilenetv2', 'xception','efficientnet'] if models == 'all' else models
model_type = trial.suggest_categorical('model', model_types)
model_size = trial.suggest_categorical('model_size', model_sizes)
learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
optimizer = trial.suggest_categorical('optimizer', ['adam', 'sgd', 'lamb'])
classifier_layers = trial.suggest_int('classifier_layers', 1, 3)
classifier_units = []
for i in range(classifier_layers):
classifier_units.append(trial.suggest_int(f'classifier_units_{i}', 32, 1024))
dropout = trial.suggest_float('dropout', 0.1, 0.5)
l2_reg = trial.suggest_float('l2_reg', 1e-5, 1e-1, log=True)
use_weights = trial.suggest_categorical('use_weights', ['imagenet', None])
if use_weights == 'imagenet':
trainable_layers = trial.suggest_int('trainable_layers', 0, 5)
else:
trainable_layers = 'all'
#set up wandb for logging
hyperparameters = {'model':model_type,
'model_size':model_size,
'learning_rate':learning_rate,
'optimizer':optimizer,
'classifier_layers':classifier_layers,
'classifier_units':classifier_units,
'dropout':dropout,
'l2_reg':l2_reg,
'use_weights':use_weights,
'trainable_layers':trainable_layers,
'classification_type':classification_type}
if wandb.run is not None:
print(f'run {run.name} is running')
wandb.finish()
run = wandb.init(project='hyperparametersearch_results',anonymous="allow",reinit=False, magic=True, group=str(dt), job_type=model_type, config=hyperparameters,tags=['trial_'+str(trial.number), model_type, classification_type])
#run.name= f'trial{trial.number}_' + run.name
#run.display(height=720)
#read the input shape and number of classes from the dataset
input_shape = train_ds.element_spec[0].shape[1:]
n_classes = train_ds.element_spec[1].shape[-1]
print('input_shape:',input_shape)
if input_shape[0] < 75 and input_shape[1] < 75:
raise ValueError('input_shape too small, consider resizing the images to at least 75x75')
if input_shape[-1] != 3:
use_weights = None
print('n_classes:',n_classes)
model = build_model(input_shape,
n_classes,
model_type=model_type,
model_size=model_size,
weights=use_weights,
trainable_layers=trainable_layers,
classifier_neurons=classifier_units,
dropout=dropout,
l2_reg=l2_reg,
optimizer=optimizer,
lr=learning_rate,
classification_type=classification_type,
verbose=0)
callbacks = [WandbMetricsLogger(),KerasPruningCallback(trial, 'f1_score')]
model, _ = train_model(model,
train_ds,
val_ds,
save_path=save_path,
folder='trial'+ str(trial.number)+'_model',
epochs=max_epochs,
patience = 10,
cbs=callbacks,
verbose=2)
best_model_path = os.path.join(save_path,'trial'+ str(trial.number)+'_model',model.name,'best_model')
run.log_model(best_model_path+'.h5', name = 'best_model')
run.log({'best_model': best_model_path+'.h5'})
metrics = model.evaluate(test_ds, return_dict=True)
score = metrics['f1_score']
run.finish()
#wandb.finish()
return score
os.environ["WANDB_MODE"] = "online"
dt= datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
study = optuna.create_study(sampler=optuna.samplers.TPESampler(),pruner=optuna.pruners.HyperbandPruner())
study.optimize(HPO_objective, n_trials=iterations)
optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_intermediate_values(study)
optuna.visualization.plot_parallel_coordinate(study)
optuna.visualization.plot_param_importances(study)
pruned_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.COMPLETE])
print("Number of finished trials: {}".format(len(study.trials)))
print(" Number of pruned trials: ", len(pruned_trials))
print(" Number of complete trials: ", len(complete_trials))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
def cross_validation(model, img_path, label_path, folds, img_size, n_classes, augment_strength=0.2, img_type='png', verbose=1):
assert os.path.exists(img_path), 'image_path does not exist'
assert os.path.exists(label_path), 'label_path does not exist'
assert len(img_size) == 3, 'img_size should have 3 dimensions, last dimension contains image channels'
assert img_type in ['png', 'jpeg'], 'img_type should be one of the following: png, jpeg'
assert isinstance(model, str) or isinstance(model, tf.keras.Model), 'model should be a path to a saved model or a tf.keras.Model object'
#define a function to read the images from image filepaths
def parse_images(image_path, label):
image = tf.io.read_file(image_path)
if img_type == 'png':
image = tf.image.decode_png(image, channels=img_size[2])
elif img_type == 'jpeg':
image = tf.image.decode_jpeg(image, channels=img_size[2])
image = tf.image.resize(image, (img_size[0],img_size[1]),method='area')
return image, label
labels = pd.read_csv(label_path)
img_files = labels.iloc[:,0].values
img_files = img_path + '\\' + img_files #make whole path for images
labels = labels.drop(labels.columns[0], axis=1)
class_names = labels.columns
labels = labels.to_numpy()
assert len(img_files) == len(labels), 'number of images and labels do not match'
image_of_each_class ={}
while len(image_of_each_class) < n_classes:
for i in range(len(labels)):
label = np.argmax(labels[i])
if label not in image_of_each_class.keys():
image_of_each_class[label] = img_files[i]
dataset = tf.data.Dataset.from_tensor_slices((img_files,labels))#construct tf.data.Dataset object
dataset = dataset.shuffle(len(img_files), reshuffle_each_iteration=False,seed=42)
#split dataset into folds
dataset = dataset.enumerate()
dataset = dataset.batch(len(img_files)//folds)
folds = []
for i, fold in dataset:
fold = fold.map(parse_images, num_parallel_calls=tf.data.AUTOTUNE)# here we read the images from the file; happens during training for reduced memory usage and faster training
fold = fold.batch(1)
fold = fold.prefetch(buffer_size=tf.data.AUTOTUNE)
folds.append(fold)
#load model if path is given
if isinstance(model, str):
model = load_model(model,custom_objects={'f1_score': tfa.metrics.F1Score})
#perform cross validation
fold_metrics = []
for i, fold in enumerate(folds):
print('fold:',i)
train_folds = folds[:i] + folds[i+1:]
train_ds = train_folds[0]
for j in range(1,len(train_folds)):
train_ds = train_ds.concatenate(train_folds[j])
val_ds = fold
model, history = train_model(model, train_ds, val_ds, save_path='.', folder='cross_validation', epochs=60, patience = 20, hps=False, verbose=1)
metrics = model.evaluate(val_ds, return_dict=True)
fold_metrics.append(metrics)
result_df = pd.DataFrame(fold_metrics)
print(result_df)
mean_metrics = result_df.mean(axis=0)
print('mean metrics:',mean_metrics)
return mean_metrics
Did i overlook something obvious? Help would be appreciated.