Fix frequent failure of Jenkins CI. (#2490)

This commit is contained in:
Yuhong Guo
2018-08-03 01:28:28 +08:00
committed by Robert Nishihara
parent d8ba667175
commit d2ebe4d9a3
5 changed files with 63 additions and 31 deletions
+12 -7
View File
@@ -37,14 +37,19 @@ def _wait_for_nodes_to_join(num_nodes, timeout=20):
ready = True
# Check that for each node, a local scheduler and a plasma manager
# are present.
for ip_address, clients in client_table.items():
client_types = [client["ClientType"] for client in clients]
if "local_scheduler" not in client_types:
ready = False
if "plasma_manager" not in client_types:
ready = False
if ready:
if ray.global_state.use_raylet:
# In raylet mode, this is a list of map.
# The GCS info will appear as a whole instead of part by part.
return
else:
for ip_address, clients in client_table.items():
client_types = [client["ClientType"] for client in clients]
if "local_scheduler" not in client_types:
ready = False
if "plasma_manager" not in client_types:
ready = False
if ready:
return
if num_ready_nodes > num_nodes:
# Too many nodes have joined. Something must be wrong.
raise Exception("{} nodes have joined the cluster, but we were "