mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 03:18:59 +08:00
Fix frequent failure of Jenkins CI. (#2490)
This commit is contained in:
committed by
Robert Nishihara
parent
d8ba667175
commit
d2ebe4d9a3
@@ -37,14 +37,19 @@ def _wait_for_nodes_to_join(num_nodes, timeout=20):
|
||||
ready = True
|
||||
# Check that for each node, a local scheduler and a plasma manager
|
||||
# are present.
|
||||
for ip_address, clients in client_table.items():
|
||||
client_types = [client["ClientType"] for client in clients]
|
||||
if "local_scheduler" not in client_types:
|
||||
ready = False
|
||||
if "plasma_manager" not in client_types:
|
||||
ready = False
|
||||
if ready:
|
||||
if ray.global_state.use_raylet:
|
||||
# In raylet mode, this is a list of map.
|
||||
# The GCS info will appear as a whole instead of part by part.
|
||||
return
|
||||
else:
|
||||
for ip_address, clients in client_table.items():
|
||||
client_types = [client["ClientType"] for client in clients]
|
||||
if "local_scheduler" not in client_types:
|
||||
ready = False
|
||||
if "plasma_manager" not in client_types:
|
||||
ready = False
|
||||
if ready:
|
||||
return
|
||||
if num_ready_nodes > num_nodes:
|
||||
# Too many nodes have joined. Something must be wrong.
|
||||
raise Exception("{} nodes have joined the cluster, but we were "
|
||||
|
||||
@@ -1411,7 +1411,12 @@ def get_address_info_from_redis_helper(redis_address,
|
||||
(client_node_ip_address == "127.0.0.1"
|
||||
and redis_ip_address == ray.services.get_node_ip_address())):
|
||||
raylets.append(client)
|
||||
|
||||
# Make sure that at least one raylet has started locally.
|
||||
# This handles a race condition where Redis has started but
|
||||
# the raylet has not connected.
|
||||
if len(raylets) == 0:
|
||||
raise Exception(
|
||||
"Redis has started but no raylets have registered yet.")
|
||||
object_store_addresses = [
|
||||
services.ObjectStoreAddress(
|
||||
name=ray.utils.decode(raylet.ObjectStoreSocketName()),
|
||||
|
||||
Reference in New Issue
Block a user