Hello, thank you! I am using wandb 0.13.5 with python 3.9. Note that this is run on my school’s compute cluster with other potential wandb users. Also, note that this issue inconsistently occurs (same code run twice sometimes fails and sometimes does not). My original workflow was an ML script with a PyTorch training loop, where after reading a .json config file, I initialize wandb with the following:
wandb.init(
project=..., entity=... config=config, mode="offline" if args.disable_wandb else "online"
)
I also set the WANDB_API_KEY
environmental variable with my API key. My program would throw an error on this line.
To try and fix this, I deleted the wandb
folder in my project directory and ran wandb login --relogin
in the command line, and then got the following error:
Traceback (most recent call last):
File "/home_dir_path/.conda/envs/manifold-contrastive/bin/wandb", line 8, in <module>
sys.exit(cli())
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/storage/home/hcoda1/0/kfallah3/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/cli/cli.py", line 97, in wrapper
return func(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/cli/cli.py", line 236, in login
wandb.setup(settings=login_settings)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 312, in setup
ret = _setup(settings=settings)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 307, in _setup
wl = _WandbSetup(settings=settings)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 293, in __init__
_WandbSetup._instance = _WandbSetup__WandbSetup(settings=settings, pid=pid)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 106, in __init__
self._setup()
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 234, in _setup
self._setup_manager()
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_setup.py", line 265, in _setup_manager
self._manager = wandb_manager._Manager(
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/wandb_manager.py", line 108, in __init__
self._service.start()
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/service/service.py", line 112, in start
self._launch_server()
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/service/service.py", line 108, in _launch_server
assert ports_found
AssertionError
[kfallah@login-phoenix-slurm-3]% Traceback (most recent call last):
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/__main__.py", line 3, in <module>
cli.cli(prog_name="python -m wandb")
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1657, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/cli/cli.py", line 97, in wrapper
return func(*args, **kwargs)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/cli/cli.py", line 282, in service
server.serve()
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/service/server.py", line 130, in serve
self._inform_used_ports(grpc_port=grpc_port, sock_port=sock_port)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/service/server.py", line 65, in _inform_used_ports
pf.write(self._port_fname)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/site-packages/wandb/sdk/service/port_file.py", line 25, in write
f = tempfile.NamedTemporaryFile(prefix=bname, dir=dname, mode="w", delete=False)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/tempfile.py", line 545, in NamedTemporaryFile
(fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type)
File "/home_dir_path/.conda/envs/manifold-contrastive/lib/python3.9/tempfile.py", line 255, in _mkstemp_inner
fd = _os.open(file, flags, 0o600)
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/tmpcmvswfcr/port-105577.txt0fmtukes'
Potentially a duplicate of [CLI]: Can't find port file when using wandb.require("service") · Issue #3911 · wandb/wandb · GitHub
I have pointed this issue out to the compute cluster administrators. Any potential workarounds I could use for now would be very helpful.