I am using pytorch in a docker container and normal wanb logging works fine. However, sweeps are throwing me broken pipe error.
detailed output:
self.send_server_request(server_req)
File “/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py”, line 155, in send_server_request
self._sendall_with_error_handle(header + data)
File “/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py”, line 130, in _sendall_with_error_handle
self._send_message(msg)
File “/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py”, line 152, in _send_message
sent = self._sock.send(data)
BrokenPipeError : self._sendall_with_error_handle(header + data)[Errno 32] Broken pipe
File “/opt/conda/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py”, line 130, in _sendall_with_error_handle
sent = self._sock.send(data)
BrokenPipeError: [Errno 32] Broken pipe
wandb: Agent Starting Run: k6kbvtuu with config:
wandb: learning_rate: 0.0004854382775075168
wandb: sampling_frequency: 2
code
opts = options.parse()
if opts.wandb_sweep:
wanb_obj = wandb_logging.wandb_logging(opts)
wandb_config = wanb_obj.get_config()def main():
trainer = Trainer(opts, wandb_sweep = True, wandb_config = wandb_config, wandb_obj = wanb_obj)
trainer.train()sweep_configuration = {
“method”: “random”,
“metric”: {“goal”: “minimize”, “name”: “train2_loss”},
“parameters”: {
“learning_rate”: {“max”: 1e-3, “min”: 1e-8},
“sampling_frequency”: {“values”: [1, 2, 3, 4]},
},
}wanb_obj.startSweep(sweep_configuration=sweep_configuration, project_name=“my-first-sweep”, function_to_run = main, count = 5)
my methods:
wandb_logging() has
wand.login()
wandb.init(project=“my-first-sweep”, config=self.config, dir = ‘data/logs’)
startSweep()
sweep_id = wandb.sweep(sweep=sweep_configuration, project=project_name)
wandb.agent(sweep_id, function=function_to_run, count=count)