mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 00:46:14 +08:00
[ID Refactor] Rename DriverID to JobID (#5004)
* WIP WIP WIP Rename Driver -> Job Fix complition Fix Rename in Java In py WIP Fix WIP Fix Fix test Fix Fix C++ linting Fix * Update java/runtime/src/main/java/org/ray/runtime/config/RayConfig.java Co-Authored-By: Stephanie Wang <swang@cs.berkeley.edu> * Update src/ray/core_worker/core_worker.cc Co-Authored-By: Stephanie Wang <swang@cs.berkeley.edu> * Address comments * Fix * Fix CI * Fix cpp linting * Fix py lint * FIx * Address comments and fix * Address comments * Address * Fix import_threading
This commit is contained in:
+23
-23
@@ -130,14 +130,14 @@ class Monitor(object):
|
||||
"Monitor: "
|
||||
"could not find ip for client {}".format(client_id))
|
||||
|
||||
def _xray_clean_up_entries_for_driver(self, driver_id):
|
||||
"""Remove this driver's object/task entries from redis.
|
||||
def _xray_clean_up_entries_for_job(self, job_id):
|
||||
"""Remove this job's object/task entries from redis.
|
||||
|
||||
Removes control-state entries of all tasks and task return
|
||||
objects belonging to the driver.
|
||||
|
||||
Args:
|
||||
driver_id: The driver id.
|
||||
job_id: The job id.
|
||||
"""
|
||||
|
||||
xray_task_table_prefix = (
|
||||
@@ -146,23 +146,23 @@ class Monitor(object):
|
||||
ray.gcs_utils.TablePrefix_OBJECT_string.encode("ascii"))
|
||||
|
||||
task_table_objects = ray.tasks()
|
||||
driver_id_hex = binary_to_hex(driver_id)
|
||||
driver_task_id_bins = set()
|
||||
job_id_hex = binary_to_hex(job_id)
|
||||
job_task_id_bins = set()
|
||||
for task_id_hex, task_info in task_table_objects.items():
|
||||
task_table_object = task_info["TaskSpec"]
|
||||
task_driver_id_hex = task_table_object["DriverID"]
|
||||
if driver_id_hex != task_driver_id_hex:
|
||||
task_job_id_hex = task_table_object["JobID"]
|
||||
if job_id_hex != task_job_id_hex:
|
||||
# Ignore tasks that aren't from this driver.
|
||||
continue
|
||||
driver_task_id_bins.add(hex_to_binary(task_id_hex))
|
||||
job_task_id_bins.add(hex_to_binary(task_id_hex))
|
||||
|
||||
# Get objects associated with the driver.
|
||||
object_table_objects = ray.objects()
|
||||
driver_object_id_bins = set()
|
||||
job_object_id_bins = set()
|
||||
for object_id, _ in object_table_objects.items():
|
||||
task_id_bin = ray._raylet.compute_task_id(object_id).binary()
|
||||
if task_id_bin in driver_task_id_bins:
|
||||
driver_object_id_bins.add(object_id.binary())
|
||||
if task_id_bin in job_task_id_bins:
|
||||
job_object_id_bins.add(object_id.binary())
|
||||
|
||||
def to_shard_index(id_bin):
|
||||
if len(id_bin) == ray.TaskID.size():
|
||||
@@ -174,10 +174,10 @@ class Monitor(object):
|
||||
|
||||
# Form the redis keys to delete.
|
||||
sharded_keys = [[] for _ in range(len(ray.state.state.redis_clients))]
|
||||
for task_id_bin in driver_task_id_bins:
|
||||
for task_id_bin in job_task_id_bins:
|
||||
sharded_keys[to_shard_index(task_id_bin)].append(
|
||||
xray_task_table_prefix + task_id_bin)
|
||||
for object_id_bin in driver_object_id_bins:
|
||||
for object_id_bin in job_object_id_bins:
|
||||
sharded_keys[to_shard_index(object_id_bin)].append(
|
||||
xray_object_table_prefix + object_id_bin)
|
||||
|
||||
@@ -198,21 +198,21 @@ class Monitor(object):
|
||||
"entries from redis shard {}.".format(
|
||||
len(keys) - num_deleted, shard_index))
|
||||
|
||||
def xray_driver_removed_handler(self, unused_channel, data):
|
||||
"""Handle a notification that a driver has been removed.
|
||||
def xray_job_removed_handler(self, unused_channel, data):
|
||||
"""Handle a notification that a job has been removed.
|
||||
|
||||
Args:
|
||||
unused_channel: The message channel.
|
||||
data: The message data.
|
||||
"""
|
||||
gcs_entries = ray.gcs_utils.GcsEntry.FromString(data)
|
||||
driver_data = gcs_entries.entries[0]
|
||||
message = ray.gcs_utils.DriverTableData.FromString(driver_data)
|
||||
driver_id = message.driver_id
|
||||
job_data = gcs_entries.entries[0]
|
||||
message = ray.gcs_utils.JobTableData.FromString(job_data)
|
||||
job_id = message.job_id
|
||||
logger.info("Monitor: "
|
||||
"XRay Driver {} has been removed.".format(
|
||||
binary_to_hex(driver_id)))
|
||||
self._xray_clean_up_entries_for_driver(driver_id)
|
||||
binary_to_hex(job_id)))
|
||||
self._xray_clean_up_entries_for_job(job_id)
|
||||
|
||||
def process_messages(self, max_messages=10000):
|
||||
"""Process all messages ready in the subscription channels.
|
||||
@@ -240,9 +240,9 @@ class Monitor(object):
|
||||
if channel == ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL:
|
||||
# Similar functionality as raylet info channel
|
||||
message_handler = self.xray_heartbeat_batch_handler
|
||||
elif channel == ray.gcs_utils.XRAY_DRIVER_CHANNEL:
|
||||
elif channel == ray.gcs_utils.XRAY_JOB_CHANNEL:
|
||||
# Handles driver death.
|
||||
message_handler = self.xray_driver_removed_handler
|
||||
message_handler = self.xray_job_removed_handler
|
||||
else:
|
||||
raise Exception("This code should be unreachable.")
|
||||
|
||||
@@ -298,7 +298,7 @@ class Monitor(object):
|
||||
"""
|
||||
# Initialize the subscription channel.
|
||||
self.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL)
|
||||
self.subscribe(ray.gcs_utils.XRAY_DRIVER_CHANNEL)
|
||||
self.subscribe(ray.gcs_utils.XRAY_JOB_CHANNEL)
|
||||
|
||||
# TODO(rkn): If there were any dead clients at startup, we should clean
|
||||
# up the associated state in the state tables.
|
||||
|
||||
Reference in New Issue
Block a user