mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 23:25:24 +08:00
Heartbeat table uses gcs pub-sub instead of redis accessor (#8655)
This commit is contained in:
+23
-29
@@ -763,38 +763,33 @@ class GlobalState:
|
||||
|
||||
available_resources_by_id = {}
|
||||
|
||||
subscribe_clients = [
|
||||
redis_client.pubsub(ignore_subscribe_messages=True)
|
||||
for redis_client in self.redis_clients
|
||||
]
|
||||
for subscribe_client in subscribe_clients:
|
||||
subscribe_client.subscribe(gcs_utils.XRAY_HEARTBEAT_CHANNEL)
|
||||
subscribe_client = self.redis_client.pubsub(
|
||||
ignore_subscribe_messages=True)
|
||||
subscribe_client.psubscribe(gcs_utils.XRAY_HEARTBEAT_PATTERN)
|
||||
|
||||
client_ids = self._live_client_ids()
|
||||
|
||||
while set(available_resources_by_id.keys()) != client_ids:
|
||||
for subscribe_client in subscribe_clients:
|
||||
# Parse client message
|
||||
raw_message = subscribe_client.get_message()
|
||||
if (raw_message is None or raw_message["channel"] !=
|
||||
gcs_utils.XRAY_HEARTBEAT_CHANNEL):
|
||||
continue
|
||||
data = raw_message["data"]
|
||||
gcs_entries = gcs_utils.GcsEntry.FromString(data)
|
||||
heartbeat_data = gcs_entries.entries[0]
|
||||
message = gcs_utils.HeartbeatTableData.FromString(
|
||||
heartbeat_data)
|
||||
# Calculate available resources for this client
|
||||
num_resources = len(message.resources_available_label)
|
||||
dynamic_resources = {}
|
||||
for i in range(num_resources):
|
||||
resource_id = message.resources_available_label[i]
|
||||
dynamic_resources[resource_id] = (
|
||||
message.resources_available_capacity[i])
|
||||
# Parse client message
|
||||
raw_message = subscribe_client.get_message()
|
||||
if (raw_message is None or raw_message["pattern"] !=
|
||||
gcs_utils.XRAY_HEARTBEAT_PATTERN):
|
||||
continue
|
||||
data = raw_message["data"]
|
||||
pub_message = gcs_utils.PubSubMessage.FromString(data)
|
||||
heartbeat_data = pub_message.data
|
||||
message = gcs_utils.HeartbeatTableData.FromString(heartbeat_data)
|
||||
# Calculate available resources for this client
|
||||
num_resources = len(message.resources_available_label)
|
||||
dynamic_resources = {}
|
||||
for i in range(num_resources):
|
||||
resource_id = message.resources_available_label[i]
|
||||
dynamic_resources[resource_id] = (
|
||||
message.resources_available_capacity[i])
|
||||
|
||||
# Update available resources for this client
|
||||
client_id = ray.utils.binary_to_hex(message.client_id)
|
||||
available_resources_by_id[client_id] = dynamic_resources
|
||||
# Update available resources for this client
|
||||
client_id = ray.utils.binary_to_hex(message.client_id)
|
||||
available_resources_by_id[client_id] = dynamic_resources
|
||||
|
||||
# Update clients in cluster
|
||||
client_ids = self._live_client_ids()
|
||||
@@ -811,8 +806,7 @@ class GlobalState:
|
||||
total_available_resources[resource_id] += num_available
|
||||
|
||||
# Close the pubsub clients to avoid leaking file descriptors.
|
||||
for subscribe_client in subscribe_clients:
|
||||
subscribe_client.close()
|
||||
subscribe_client.close()
|
||||
|
||||
return dict(total_available_resources)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user