Hi!
I am using this notebook as a tutorial to log sweeps into my Dashboard.
Everything works very well besides the Metric, which loggs with the value Null (see this run for details)
My process is as follows:
Create a train function
def train_XGBRanker(config_defaults):
# π W&B Experiment
config_defaults.update(CONFIG)
run = wandb.init(project='AI4Code', name='xgbRanker', config=config_defaults)
config = wandb.config
# Initiate the model
model = XGBRanker(tree_method = config.tree_method,
booster=config.booster,
objective=config.objective,
random_state=config.random_state,
learning_rate=config.learning_rate,
colsample_bytree=config.colsample_bytree,
eta=config.eta,
max_depth=config.max_depth,
n_estimators=config.n_estimators,
subsample=config.subsample,
min_child_weight=config.min_child_weight)
# Train the model
model.fit(X_train, y_train, group=groups, verbose=True)
# Create df containing the cell_id and the prediction
predict = pd.DataFrame({"cell_id" : df_valid["cell_id"],
"pred" : model.predict(X_valid)}, index = df_valid.index)
# Sort (using the predicted rank) and then group
predict = predict.sort_values(by = ['id', 'pred'], ascending = [False, True])\
.groupby('id')['cell_id'].apply(list)
# Create the same but for actual data
actual = df_valid.sort_values(by = ['id', 'rank'], ascending = [False, True])\
.groupby('id')['cell_id'].apply(list)
# Kendall Metric
metric = kendall_tau(actual, predict)
print(clr.S+"Kendall Tau"+clr.E, metric)
wandb.log({"kendall_tau": np.float(metric)})
try a first baseline experiment
config_defaults = {"tree_method":'hist',
"booster":'gbtree',
"objective":'rank:pairwise',
"random_state":24,
"learning_rate":0.1,
"colsample_bytree":0.9,
"eta":0.05,
"max_depth":6,
"n_estimators":110,
"subsample":0.75,
"min_child_weight":10}
train_XGBRanker(config_defaults)
which returns a kendall_tau
of 0.5479588742699661 (so the metric isnβt null).
and then I am running the Sweeps as follows:
# Sweep Config
sweep_config = {
"method": "random", # grid for all
"metric": {
"name": "kendall_tau",
"goal": "maximize"
},
"parameters": {
"booster": {
"values": ["gbtree","gblinear"]
},
"max_depth": {
"values": [3, 6, 9, 12]
},
"learning_rate": {
"values": [0.1, 0.05, 0.2]
},
"subsample": {
"values": [1, 0.5, 0.3]
}
}
}
# Sweep ID
sweep_id = wandb.sweep(sweep_config, project="AI4Code")
# π RUN SWEEPS
config_defaults = {"tree_method":'hist',
"booster":'gbtree',
"objective":'rank:pairwise',
"random_state":24,
"learning_rate":0.1,
"colsample_bytree":0.9,
"eta":0.05,
"max_depth":6,
"n_estimators":110,
"subsample":0.75,
"min_child_weight":10}
# count = the number of trials to run
wandb.agent(sweep_id, train_XGBRanker(config_defaults), count=8)
Could you please advise? I donβt know if this is related, but I also canβt run the sweeps for more than a count=5
as I get the following error:
Thank you lots!
Andrada
UPDATE:
The issue was from the fact that I was having arguments within the train_XGBRanker()
- moving config_defaults
from outside the function to in the function an then passing it to wandb.agent(sweep_id, train_XGBRanker, count=20)
did the job.