RUNNING VERSION 0.13.11
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
BrokenPipeError: [Errno 32] Broken pipe
receiving a very long traceback error resulting in either of the above messages (windows/linux). Occurs when trying to run any sweep. Occurs when connecting from various machines/IP addresses
def train():
print(1)
sweep_configuration = {
'method' : 'grid',
'name' : 'Sweep',
'metric': {
'goal': 'maximize',
'name': 'AUC'
},
'parameters': {
'learn': {'values': [1, 0.1]}
}
}
sweep_id = wandb.sweep(sweep_configuration)
wandb.agent(sweep_id, function=train, count=1)
wandb: Agent Starting Run: gwiij466 with config:
wandb: learn: 1
Exception in thread Thread-9 (_run_job):
Traceback (most recent call last):
File "\Python\Python310\lib\site-packages\wandb\agents\pyagent.py", line 299, in _run_job
wandb.finish()
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 3669, in finish
wandb.run.finish(exit_code=exit_code, quiet=quiet)
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 368, in wrapper
return func(self, *args, **kwargs)
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 331, in wrapper
return func(self, *args, **kwargs)
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 1843, in finish
return self._finish(exit_code, quiet)
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 1850, in _finish
with telemetry.context(run=self) as tel:
File "\Python\Python310\lib\site-packages\wandb\sdk\lib\telemetry.py", line 42, in __exit__
self._run._telemetry_callback(self._obj)
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 689, in _telemetry_callback
self._telemetry_flush()
File "\Python\Python310\lib\site-packages\wandb\sdk\wandb_run.py", line 700, in _telemetry_flush
self._backend.interface._publish_telemetry(self._telemetry_obj)
File "\Python\Python310\lib\site-packages\wandb\sdk\interface\interface_shared.py", line 101, in _publish_telemetry
self._publish(rec)
File "\Python\Python310\lib\site-packages\wandb\sdk\interface\interface_sock.py", line 51, in _publish
self._sock_client.send_record_publish(record)
File "\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py", line 221, in send_record_publish
self.send_server_request(server_req)
File "\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py", line 155, in send_server_request
self._send_message(msg)
File "\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py", line 152, in _send_message
self._sendall_with_error_handle(header + data)
File "\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py", line 130, in _sendall_with_error_handle
sent = self._sock.send(data)
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
During handling of the above exception, another exception occurred:
ConnectionAbortedError Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\backcall\backcall.py:104, in callback_prototype.<locals>.adapt.<locals>.adapted(*args, **kwargs)
102 kwargs.pop(name)
103 # print(args, kwargs, unmatched_pos, cut_positional, unmatched_kw)
--> 104 return callback(*args, **kwargs)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\wandb_init.py:418, in _WandbInit._pause_backend(self)
416 if self.backend.interface is not None:
417 logger.info("pausing backend") # type: ignore
--> 418 self.backend.interface.publish_pause()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\interface\interface.py:665, in InterfaceBase.publish_pause(self)
663 def publish_pause(self) -> None:
664 pause = pb.PauseRequest()
--> 665 self._publish_pause(pause)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\interface\interface_shared.py:340, in InterfaceShared._publish_pause(self, pause)
338 def _publish_pause(self, pause: pb.PauseRequest) -> None:
339 rec = self._make_request(pause=pause)
--> 340 self._publish(rec)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\interface\interface_sock.py:51, in InterfaceSock._publish(self, record, local)
49 def _publish(self, record: "pb.Record", local: Optional[bool] = None) -> None:
50 self._assign(record)
---> 51 self._sock_client.send_record_publish(record)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py:221, in SockClient.send_record_publish(self, record)
219 server_req = spb.ServerRequest()
220 server_req.record_publish.CopyFrom(record)
--> 221 self.send_server_request(server_req)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py:155, in SockClient.send_server_request(self, msg)
154 def send_server_request(self, msg: Any) -> None:
--> 155 self._send_message(msg)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py:152, in SockClient._send_message(self, msg)
150 header = struct.pack("<BI", ord("W"), raw_size)
151 with self._lock:
--> 152 self._sendall_with_error_handle(header + data)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\wandb\sdk\lib\sock_client.py:130, in SockClient._sendall_with_error_handle(self, data)
128 start_time = time.monotonic()
129 try:
--> 130 sent = self._sock.send(data)
131 # sent equal to 0 indicates a closed socket
132 if sent == 0:
ConnectionAbortedError: [WinError 10053] An established connection was aborted by the software in your host machine
wandb: Agent Starting Run: 4maabb7r with config:
wandb: learn: 0.1
Exception in thread Thread-10 (_run_job):
### same error continuing forwards