From db7097fb1ff4c321f14e0c59e1d2f9bdaad55bb3 Mon Sep 17 00:00:00 2001 From: "DK.Pino" Date: Mon, 28 Sep 2020 01:24:21 +0800 Subject: [PATCH] [Refactor] Rename ClientId to NodeId (#10992) * rename ClientId to NodeId * format lint * format lint * fix conflicts * rename new ClientId to NodeId * update lint * make same version of clang-format with travis ci --- python/ray/__init__.py | 4 +- python/ray/_raylet.pyx | 8 +- python/ray/experimental/dynamic_resources.py | 10 +- python/ray/includes/global_state_accessor.pxd | 4 +- python/ray/includes/global_state_accessor.pxi | 4 +- python/ray/includes/libcoreworker.pxd | 6 +- python/ray/includes/unique_ids.pxd | 4 +- python/ray/includes/unique_ids.pxi | 12 +- python/ray/state.py | 2 +- src/ray/common/id_def.h | 2 +- src/ray/common/task/task.h | 2 +- src/ray/core_worker/actor_manager.cc | 3 +- src/ray/core_worker/core_worker.cc | 24 ++-- src/ray/core_worker/core_worker.h | 8 +- .../java/io_ray_runtime_RayNativeRuntime.cc | 2 +- .../io_ray_runtime_gcs_GlobalStateAccessor.cc | 2 +- .../io_ray_runtime_task_NativeTaskExecutor.cc | 2 +- .../core_worker/object_recovery_manager.cc | 6 +- src/ray/core_worker/object_recovery_manager.h | 2 +- src/ray/core_worker/reference_count.cc | 31 ++--- src/ray/core_worker/reference_count.h | 32 +++-- src/ray/core_worker/reference_count_test.cc | 10 +- src/ray/core_worker/task_manager.cc | 2 +- src/ray/core_worker/task_manager.h | 4 +- .../core_worker/test/actor_manager_test.cc | 2 +- .../test/direct_task_transport_test.cc | 112 +++++++++--------- .../test/object_recovery_manager_test.cc | 4 +- src/ray/core_worker/test/task_manager_test.cc | 2 +- .../transport/direct_task_transport.cc | 4 +- .../transport/direct_task_transport.h | 8 +- src/ray/gcs/accessor.h | 27 ++--- .../gcs/gcs_client/global_state_accessor.cc | 2 +- .../gcs/gcs_client/global_state_accessor.h | 2 +- .../gcs/gcs_client/service_based_accessor.cc | 44 ++++--- .../gcs/gcs_client/service_based_accessor.h | 35 +++--- .../test/global_state_accessor_test.cc | 8 +- .../test/service_based_gcs_client_test.cc | 44 +++---- src/ray/gcs/gcs_server/gcs_actor_manager.cc | 28 ++--- src/ray/gcs/gcs_server/gcs_actor_manager.h | 23 ++-- src/ray/gcs/gcs_server/gcs_actor_scheduler.cc | 15 ++- src/ray/gcs/gcs_server/gcs_actor_scheduler.h | 29 +++-- src/ray/gcs/gcs_server/gcs_node_manager.cc | 36 +++--- src/ray/gcs/gcs_server/gcs_node_manager.h | 32 ++--- src/ray/gcs/gcs_server/gcs_object_manager.cc | 22 ++-- src/ray/gcs/gcs_server/gcs_object_manager.h | 16 +-- .../gcs_server/gcs_placement_group_manager.cc | 4 +- .../gcs_server/gcs_placement_group_manager.h | 2 +- .../gcs_placement_group_scheduler.cc | 30 ++--- .../gcs_placement_group_scheduler.h | 35 +++--- src/ray/gcs/gcs_server/gcs_server.cc | 6 +- src/ray/gcs/gcs_server/gcs_table_storage.cc | 8 +- src/ray/gcs/gcs_server/gcs_table_storage.h | 8 +- src/ray/gcs/gcs_server/gcs_worker_manager.cc | 2 +- src/ray/gcs/gcs_server/stats_handler_impl.cc | 3 +- .../gcs/gcs_server/task_info_handler_impl.cc | 5 +- .../gcs_server/test/gcs_actor_manager_test.cc | 41 ++++--- .../test/gcs_actor_scheduler_test.cc | 46 +++---- .../gcs_server/test/gcs_node_manager_test.cc | 8 +- .../test/gcs_object_manager_test.cc | 16 +-- .../test/gcs_placement_group_manager_test.cc | 12 +- .../gcs_placement_group_scheduler_test.cc | 12 +- .../gcs_server/test/gcs_server_rpc_test.cc | 8 +- .../gcs_server/test/gcs_server_test_util.h | 32 ++--- src/ray/gcs/pb_util.h | 4 +- src/ray/gcs/redis_accessor.cc | 71 ++++++----- src/ray/gcs/redis_accessor.h | 61 +++++----- src/ray/gcs/redis_context.cc | 2 +- src/ray/gcs/redis_context.h | 2 +- src/ray/gcs/redis_gcs_client.cc | 4 +- src/ray/gcs/subscription_executor.cc | 15 ++- src/ray/gcs/subscription_executor.h | 6 +- src/ray/gcs/tables.cc | 51 ++++---- src/ray/gcs/tables.h | 60 +++++----- src/ray/gcs/test/gcs_test_util.h | 8 +- src/ray/gcs/test/redis_gcs_client_test.cc | 44 +++---- .../gcs/test/redis_node_info_accessor_test.cc | 12 +- .../test/redis_object_info_accessor_test.cc | 6 +- .../gcs/test/subscription_executor_test.cc | 18 +-- src/ray/object_manager/object_directory.cc | 14 +-- src/ray/object_manager/object_directory.h | 22 ++-- src/ray/object_manager/object_manager.cc | 37 +++--- src/ray/object_manager/object_manager.h | 39 +++--- .../ownership_based_object_directory.cc | 16 +-- .../ownership_based_object_directory.h | 4 +- src/ray/object_manager/plasma/common.h | 4 +- src/ray/object_manager/plasma/protocol.cc | 4 +- src/ray/object_manager/plasma/protocol.h | 2 +- src/ray/object_manager/plasma/store.cc | 4 +- src/ray/object_manager/plasma/store.h | 2 +- .../test/object_manager_stress_test.cc | 24 ++-- .../test/object_manager_test.cc | 22 ++-- src/ray/raylet/actor_registration.cc | 4 +- src/ray/raylet/actor_registration.h | 2 +- src/ray/raylet/agent_manager.h | 2 +- src/ray/raylet/format/node_manager.fbs | 2 +- src/ray/raylet/node_manager.cc | 50 ++++---- src/ray/raylet/node_manager.h | 26 ++-- .../raylet/object_manager_integration_test.cc | 16 +-- src/ray/raylet/raylet.cc | 2 +- src/ray/raylet/raylet.h | 2 +- src/ray/raylet/reconstruction_policy.cc | 6 +- src/ray/raylet/reconstruction_policy.h | 4 +- src/ray/raylet/reconstruction_policy_test.cc | 30 ++--- .../raylet/scheduling/cluster_task_manager.cc | 6 +- .../raylet/scheduling/cluster_task_manager.h | 9 +- .../scheduling/cluster_task_manager_test.cc | 6 +- src/ray/raylet/scheduling_policy.cc | 34 +++--- src/ray/raylet/scheduling_policy.h | 12 +- src/ray/raylet_client/raylet_client.cc | 7 +- src/ray/raylet_client/raylet_client.h | 10 +- src/ray/rpc/worker/core_worker_client.h | 4 +- 111 files changed, 862 insertions(+), 889 deletions(-) diff --git a/python/ray/__init__.py b/python/ray/__init__.py index 10918f0d0..d6550d83f 100644 --- a/python/ray/__init__.py +++ b/python/ray/__init__.py @@ -66,7 +66,7 @@ from ray._raylet import ( ActorCheckpointID, ActorClassID, ActorID, - ClientID, + NodeID, Config as _Config, JobID, WorkerID, @@ -151,7 +151,7 @@ __all__ += [ "ActorCheckpointID", "ActorClassID", "ActorID", - "ClientID", + "NodeID", "JobID", "WorkerID", "FunctionID", diff --git a/python/ray/_raylet.pyx b/python/ray/_raylet.pyx index 94734be64..88204569e 100644 --- a/python/ray/_raylet.pyx +++ b/python/ray/_raylet.pyx @@ -76,7 +76,7 @@ from ray.includes.unique_ids cimport ( CActorID, CActorCheckpointID, CObjectID, - CClientID, + CNodeID, CPlacementGroupID, ) from ray.includes.libcoreworker cimport ( @@ -784,7 +784,7 @@ cdef class CoreWorker: CCoreWorkerProcess.GetCoreWorker().GetCurrentJobId().Binary()) def get_current_node_id(self): - return ClientID( + return NodeID( CCoreWorkerProcess.GetCoreWorker().GetCurrentNodeId().Binary()) def get_actor_id(self): @@ -1479,10 +1479,10 @@ cdef class CoreWorker: actor_id.native(), checkpoint_id.native())) def set_resource(self, basestring resource_name, - double capacity, ClientID client_id): + double capacity, NodeID client_id): CCoreWorkerProcess.GetCoreWorker().SetResource( resource_name.encode("ascii"), capacity, - CClientID.FromBinary(client_id.binary())) + CNodeID.FromBinary(client_id.binary())) def force_spill_objects(self, object_refs): cdef c_vector[CObjectID] object_ids diff --git a/python/ray/experimental/dynamic_resources.py b/python/ray/experimental/dynamic_resources.py index 7eb5cc056..ca7c20740 100644 --- a/python/ray/experimental/dynamic_resources.py +++ b/python/ray/experimental/dynamic_resources.py @@ -4,17 +4,17 @@ import ray def set_resource(resource_name, capacity, client_id=None): """ Set a resource to a specified capacity. - This creates, updates or deletes a custom resource for a target clientId. + This creates, updates or deletes a custom resource for a target NodeID. If the resource already exists, it's capacity is updated to the new value. If the capacity is set to 0, the resource is deleted. - If ClientID is not specified or set to None, + If NodeID is not specified or set to None, the resource is created on the local client where the actor is running. Args: resource_name (str): Name of the resource to be created capacity (int): Capacity of the new resource. Resource is deleted if capacity is 0. - client_id (str): The ClientId of the node where the resource is to be + client_id (str): The NodeID of the node where the resource is to be set. Returns: @@ -25,9 +25,9 @@ def set_resource(resource_name, capacity, client_id=None): specified. """ if client_id is not None: - client_id_obj = ray.ClientID(ray.utils.hex_to_binary(client_id)) + client_id_obj = ray.NodeID(ray.utils.hex_to_binary(client_id)) else: - client_id_obj = ray.ClientID.nil() + client_id_obj = ray.NodeID.nil() if (capacity < 0) or (capacity != int(capacity)): raise ValueError( "Capacity {} must be a non-negative integer.".format(capacity)) diff --git a/python/ray/includes/global_state_accessor.pxd b/python/ray/includes/global_state_accessor.pxd index dce948389..34d34e97b 100644 --- a/python/ray/includes/global_state_accessor.pxd +++ b/python/ray/includes/global_state_accessor.pxd @@ -4,7 +4,7 @@ from libcpp.vector cimport vector as c_vector from libcpp.memory cimport unique_ptr from ray.includes.unique_ids cimport ( CActorID, - CClientID, + CNodeID, CObjectID, CWorkerID, CPlacementGroupID, @@ -24,7 +24,7 @@ cdef extern from "ray/gcs/gcs_client/global_state_accessor.h" nogil: unique_ptr[c_string] GetObjectInfo(const CObjectID &object_id) c_vector[c_string] GetAllActorInfo() unique_ptr[c_string] GetActorInfo(const CActorID &actor_id) - c_string GetNodeResourceInfo(const CClientID &node_id) + c_string GetNodeResourceInfo(const CNodeID &node_id) unique_ptr[c_string] GetWorkerInfo(const CWorkerID &worker_id) c_vector[c_string] GetAllWorkerInfo() c_bool AddWorkerInfo(const c_string &serialized_string) diff --git a/python/ray/includes/global_state_accessor.pxi b/python/ray/includes/global_state_accessor.pxi index bda49ac20..db9e66fbd 100644 --- a/python/ray/includes/global_state_accessor.pxi +++ b/python/ray/includes/global_state_accessor.pxi @@ -1,6 +1,6 @@ from ray.includes.unique_ids cimport ( CActorID, - CClientID, + CNodeID, CObjectID, CWorkerID, CPlacementGroupID @@ -89,7 +89,7 @@ cdef class GlobalStateAccessor: def get_node_resource_info(self, node_id): cdef c_string result - cdef CClientID cnode_id = CClientID.FromBinary(node_id.binary()) + cdef CNodeID cnode_id = CNodeID.FromBinary(node_id.binary()) with nogil: result = self.inner.get().GetNodeResourceInfo(cnode_id) return result diff --git a/python/ray/includes/libcoreworker.pxd b/python/ray/includes/libcoreworker.pxd index 57e8a62f5..82e47824a 100644 --- a/python/ray/includes/libcoreworker.pxd +++ b/python/ray/includes/libcoreworker.pxd @@ -14,7 +14,7 @@ from libcpp.vector cimport vector as c_vector from ray.includes.unique_ids cimport ( CActorID, CActorCheckpointID, - CClientID, + CNodeID, CJobID, CTaskID, CObjectID, @@ -121,7 +121,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil: CJobID GetCurrentJobId() CTaskID GetCurrentTaskId() - CClientID GetCurrentNodeId() + CNodeID GetCurrentNodeId() CPlacementGroupID GetCurrentPlacementGroupId() const CActorID &GetActorId() void SetActorTitle(const c_string &title) @@ -196,7 +196,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil: const CActorID &actor_id, const CActorCheckpointID &checkpoint_id) CRayStatus SetResource(const c_string &resource_name, const double capacity, - const CClientID &client_Id) + const CNodeID &client_Id) CRayStatus SpillObjects(const c_vector[CObjectID] &object_ids) CRayStatus ForceRestoreSpilledObjects( const c_vector[CObjectID] &object_ids) diff --git a/python/ray/includes/unique_ids.pxd b/python/ray/includes/unique_ids.pxd index a33505e90..e79da124d 100644 --- a/python/ray/includes/unique_ids.pxd +++ b/python/ray/includes/unique_ids.pxd @@ -65,10 +65,10 @@ cdef extern from "ray/common/id.h" namespace "ray" nogil: CActorID Of(CJobID job_id, CTaskID parent_task_id, int64_t parent_task_counter) - cdef cppclass CClientID "ray::ClientID"(CUniqueID): + cdef cppclass CNodeID "ray::NodeID"(CUniqueID): @staticmethod - CClientID FromBinary(const c_string &binary) + CNodeID FromBinary(const c_string &binary) cdef cppclass CConfigID "ray::ConfigID"(CUniqueID): diff --git a/python/ray/includes/unique_ids.pxi b/python/ray/includes/unique_ids.pxi index 5c9901040..409c44382 100644 --- a/python/ray/includes/unique_ids.pxi +++ b/python/ray/includes/unique_ids.pxi @@ -12,7 +12,7 @@ from ray.includes.unique_ids cimport ( CActorCheckpointID, CActorClassID, CActorID, - CClientID, + CNodeID, CConfigID, CJobID, CFunctionID, @@ -199,14 +199,14 @@ cdef class TaskID(BaseID): CTaskID.FromBinary(parent_task_id.binary()), parent_task_counter).Binary()) -cdef class ClientID(UniqueID): +cdef class NodeID(UniqueID): def __init__(self, id): check_id(id) - self.data = CClientID.FromBinary(id) + self.data = CNodeID.FromBinary(id) - cdef CClientID native(self): - return self.data + cdef CNodeID native(self): + return self.data cdef class JobID(BaseID): @@ -373,7 +373,7 @@ _ID_TYPES = [ ActorCheckpointID, ActorClassID, ActorID, - ClientID, + NodeID, JobID, WorkerID, FunctionID, diff --git a/python/ray/state.py b/python/ray/state.py index 40aa839ad..579df4179 100644 --- a/python/ray/state.py +++ b/python/ray/state.py @@ -266,7 +266,7 @@ class GlobalState: """ self._check_connected() - node_id = ray.ClientID(hex_to_binary(node_id)) + node_id = ray.NodeID(hex_to_binary(node_id)) node_resource_bytes = \ self.global_state_accessor.get_node_resource_info(node_id) if node_resource_bytes is None: diff --git a/src/ray/common/id_def.h b/src/ray/common/id_def.h index 6e17bdf61..ef82aba3b 100644 --- a/src/ray/common/id_def.h +++ b/src/ray/common/id_def.h @@ -23,4 +23,4 @@ DEFINE_UNIQUE_ID(ActorClassID) DEFINE_UNIQUE_ID(ActorCheckpointID) DEFINE_UNIQUE_ID(WorkerID) DEFINE_UNIQUE_ID(ConfigID) -DEFINE_UNIQUE_ID(ClientID) +DEFINE_UNIQUE_ID(NodeID) diff --git a/src/ray/common/task/task.h b/src/ray/common/task/task.h index 282a641f1..800dc1d31 100644 --- a/src/ray/common/task/task.h +++ b/src/ray/common/task/task.h @@ -13,7 +13,7 @@ typedef std::function, const std::string &, int DispatchTaskCallback; /// Arguments are the raylet ID to spill back to, the raylet's /// address and the raylet's port. -typedef std::function +typedef std::function SpillbackTaskCallback; typedef std::function CancelTaskCallback; diff --git a/src/ray/core_worker/actor_manager.cc b/src/ray/core_worker/actor_manager.cc index c92bdc605..e6ef4fc87 100644 --- a/src/ray/core_worker/actor_manager.cc +++ b/src/ray/core_worker/actor_manager.cc @@ -129,8 +129,7 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id, << ", ip address: " << actor_data.address().ip_address() << ", port: " << actor_data.address().port() << ", worker_id: " << WorkerID::FromBinary(actor_data.address().worker_id()) - << ", raylet_id: " - << ClientID::FromBinary(actor_data.address().raylet_id()) + << ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id()) << ", num_restarts: " << actor_data.num_restarts(); if (actor_data.state() == gcs::ActorTableData::RESTARTING) { direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false); diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc index 1fda56147..f4662a4c2 100644 --- a/src/ray/core_worker/core_worker.cc +++ b/src/ray/core_worker/core_worker.cc @@ -302,7 +302,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ auto grpc_client = rpc::NodeManagerWorkerClient::make( options_.raylet_ip_address, options_.node_manager_port, *client_call_manager_); Status raylet_client_status; - ClientID local_raylet_id; + NodeID local_raylet_id; int assigned_port; std::unordered_map system_config; local_raylet_client_ = std::shared_ptr(new raylet::RayletClient( @@ -352,7 +352,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ RegisterToGcs(); // Register a callback to monitor removed nodes. - auto on_node_change = [this](const ClientID &node_id, const rpc::GcsNodeInfo &data) { + auto on_node_change = [this](const NodeID &node_id, const rpc::GcsNodeInfo &data) { if (data.state() == rpc::GcsNodeInfo::DEAD) { OnNodeRemoved(data); } @@ -395,7 +395,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ options_.ref_counting_enabled ? reference_counter_ : nullptr, local_raylet_client_, options_.check_signals)); - auto check_node_alive_fn = [this](const ClientID &node_id) { + auto check_node_alive_fn = [this](const NodeID &node_id) { auto node = gcs_client_->Nodes().Get(node_id); if (!node) { return false; @@ -500,7 +500,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ RAY_CHECK_OK(status); std::vector locations; for (const auto &result : results) { - const auto &node_id = ClientID::FromBinary(result.manager()); + const auto &node_id = NodeID::FromBinary(result.manager()); auto node = gcs_client_->Nodes().Get(node_id); RAY_CHECK(node.has_value()); if (node->state() == rpc::GcsNodeInfo::ALIVE) { @@ -617,7 +617,7 @@ void CoreWorker::RunIOService() { } void CoreWorker::OnNodeRemoved(const rpc::GcsNodeInfo &node_info) { - const auto node_id = ClientID::FromBinary(node_info.node_id()); + const auto node_id = NodeID::FromBinary(node_info.node_id()); RAY_LOG(INFO) << "Node failure " << node_id; const auto lost_objects = reference_counter_->ResetObjectsOnRemovedNode(node_id); // Delete the objects from the in-memory store to indicate that they are not @@ -825,7 +825,7 @@ Status CoreWorker::Put(const RayObject &object, worker_context_.GetNextPutIndex()); reference_counter_->AddOwnedObject( *object_id, contained_object_ids, rpc_address_, CurrentCallSite(), object.GetSize(), - /*is_reconstructable=*/false, ClientID::FromBinary(rpc_address_.raylet_id())); + /*is_reconstructable=*/false, NodeID::FromBinary(rpc_address_.raylet_id())); return Put(object, contained_object_ids, *object_id, /*pin_object=*/true); } @@ -884,7 +884,7 @@ Status CoreWorker::Create(const std::shared_ptr &metadata, const size_t reference_counter_->AddOwnedObject(*object_id, contained_object_ids, rpc_address_, CurrentCallSite(), data_size + metadata->Size(), /*is_reconstructable=*/false, - ClientID::FromBinary(rpc_address_.raylet_id())); + NodeID::FromBinary(rpc_address_.raylet_id())); } return Status::OK(); } @@ -1153,7 +1153,7 @@ Status CoreWorker::NotifyActorResumedFromCheckpoint( } Status CoreWorker::SetResource(const std::string &resource_name, const double capacity, - const ClientID &client_id) { + const NodeID &client_id) { return local_raylet_client_->SetResource(resource_name, capacity, client_id); } @@ -1167,7 +1167,7 @@ void CoreWorker::SpillOwnedObject(const ObjectID &object_id, } // Find the raylet that hosts the primary copy of the object. - ClientID pinned_at; + NodeID pinned_at; RAY_CHECK(reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at)); auto node = gcs_client_->Nodes().Get(pinned_at); if (pinned_at.IsNil() || !node) { @@ -2023,7 +2023,7 @@ void CoreWorker::HandleAddObjectLocationOwner( return; } reference_counter_->AddObjectLocation(ObjectID::FromBinary(request.object_id()), - ClientID::FromBinary(request.client_id())); + NodeID::FromBinary(request.client_id())); send_reply_callback(Status::OK(), nullptr, nullptr); } @@ -2036,7 +2036,7 @@ void CoreWorker::HandleRemoveObjectLocationOwner( return; } reference_counter_->RemoveObjectLocation(ObjectID::FromBinary(request.object_id()), - ClientID::FromBinary(request.client_id())); + NodeID::FromBinary(request.client_id())); send_reply_callback(Status::OK(), nullptr, nullptr); } @@ -2048,7 +2048,7 @@ void CoreWorker::HandleGetObjectLocationsOwner( send_reply_callback)) { return; } - std::unordered_set client_ids = + std::unordered_set client_ids = reference_counter_->GetObjectLocations(ObjectID::FromBinary(request.object_id())); for (const auto &client_id : client_ids) { reply->add_client_ids(client_id.Binary()); diff --git a/src/ray/core_worker/core_worker.h b/src/ray/core_worker/core_worker.h index 638abd61e..d0adf054f 100644 --- a/src/ray/core_worker/core_worker.h +++ b/src/ray/core_worker/core_worker.h @@ -350,9 +350,7 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler { const JobID &GetCurrentJobId() const { return worker_context_.GetCurrentJobID(); } - ClientID GetCurrentNodeId() const { - return ClientID::FromBinary(rpc_address_.raylet_id()); - } + NodeID GetCurrentNodeId() const { return NodeID::FromBinary(rpc_address_.raylet_id()); } const PlacementGroupID &GetCurrentPlacementGroupId() const { return worker_context_.GetCurrentPlacementGroupId(); @@ -617,10 +615,10 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler { /// Sets a resource with the specified capacity and client id /// \param[in] resource_name Name of the resource to be set. /// \param[in] capacity Capacity of the resource. - /// \param[in] client_Id ClientID where the resource is to be set. + /// \param[in] client_Id NodeID where the resource is to be set. /// \return Status Status SetResource(const std::string &resource_name, const double capacity, - const ClientID &client_id); + const NodeID &client_id); /// Request an object to be spilled to external storage. /// \param[in] object_ids The objects to be spilled. diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_RayNativeRuntime.cc b/src/ray/core_worker/lib/java/io_ray_runtime_RayNativeRuntime.cc index 035809662..ad9d5b170 100644 --- a/src/ray/core_worker/lib/java/io_ray_runtime_RayNativeRuntime.cc +++ b/src/ray/core_worker/lib/java/io_ray_runtime_RayNativeRuntime.cc @@ -252,7 +252,7 @@ JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeShutdown(JNIEn JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeSetResource( JNIEnv *env, jclass, jstring resourceName, jdouble capacity, jbyteArray nodeId) { - const auto node_id = JavaByteArrayToId(env, nodeId); + const auto node_id = JavaByteArrayToId(env, nodeId); const char *native_resource_name = env->GetStringUTFChars(resourceName, JNI_FALSE); auto status = ray::CoreWorkerProcess::GetCoreWorker().SetResource( diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc b/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc index 4c2e145d2..8fe43deef 100644 --- a/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc +++ b/src/ray/core_worker/lib/java/io_ray_runtime_gcs_GlobalStateAccessor.cc @@ -76,7 +76,7 @@ Java_io_ray_runtime_gcs_GlobalStateAccessor_nativeGetNodeResourceInfo( JNIEnv *env, jobject o, jlong gcs_accessor_ptr, jbyteArray node_id_bytes) { auto *gcs_accessor = reinterpret_cast(gcs_accessor_ptr); - auto node_id = JavaByteArrayToId(env, node_id_bytes); + auto node_id = JavaByteArrayToId(env, node_id_bytes); auto node_resource_info = gcs_accessor->GetNodeResourceInfo(node_id); return static_cast(NativeStringToJavaByteArray(env, node_resource_info)); } diff --git a/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc b/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc index 2fc4e33ba..b4403b8cb 100644 --- a/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc +++ b/src/ray/core_worker/lib/java/io_ray_runtime_task_NativeTaskExecutor.cc @@ -26,7 +26,7 @@ extern "C" { #endif -using ray::ClientID; +using ray::NodeID; JNIEXPORT jbyteArray JNICALL Java_io_ray_runtime_task_NativeTaskExecutor_nativePrepareCheckpoint(JNIEnv *env, jclass) { diff --git a/src/ray/core_worker/object_recovery_manager.cc b/src/ray/core_worker/object_recovery_manager.cc index 6e7004e4d..c5cdd5054 100644 --- a/src/ray/core_worker/object_recovery_manager.cc +++ b/src/ray/core_worker/object_recovery_manager.cc @@ -20,7 +20,7 @@ namespace ray { Status ObjectRecoveryManager::RecoverObject(const ObjectID &object_id) { // Check the ReferenceCounter to see if there is a location for the object. - ClientID pinned_at; + NodeID pinned_at; bool owned_by_us = reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at); if (!owned_by_us) { return Status::Invalid( @@ -80,12 +80,12 @@ void ObjectRecoveryManager::PinExistingObjectCopy( const std::vector &other_locations) { // If a copy still exists, pin the object by sending a // PinObjectIDs RPC. - const auto node_id = ClientID::FromBinary(raylet_address.raylet_id()); + const auto node_id = NodeID::FromBinary(raylet_address.raylet_id()); RAY_LOG(DEBUG) << "Trying to pin copy of lost object " << object_id << " at node " << node_id; std::shared_ptr client; - if (node_id == ClientID::FromBinary(rpc_address_.raylet_id())) { + if (node_id == NodeID::FromBinary(rpc_address_.raylet_id())) { client = local_object_pinning_client_; } else { absl::MutexLock lock(&mu_); diff --git a/src/ray/core_worker/object_recovery_manager.h b/src/ray/core_worker/object_recovery_manager.h index a4b04c004..c43f2de03 100644 --- a/src/ray/core_worker/object_recovery_manager.h +++ b/src/ray/core_worker/object_recovery_manager.h @@ -137,7 +137,7 @@ class ObjectRecoveryManager { mutable absl::Mutex mu_; /// Cache of gRPC clients to remote raylets for pinning objects. - absl::flat_hash_map> + absl::flat_hash_map> remote_object_pinning_clients_ GUARDED_BY(mu_); /// Objects that are currently pending recovery. Calls to RecoverObject for diff --git a/src/ray/core_worker/reference_count.cc b/src/ray/core_worker/reference_count.cc index 2376799ee..81f3040e6 100644 --- a/src/ray/core_worker/reference_count.cc +++ b/src/ray/core_worker/reference_count.cc @@ -144,11 +144,12 @@ void ReferenceCounter::AddObjectRefStats( } } -void ReferenceCounter::AddOwnedObject( - const ObjectID &object_id, const std::vector &inner_ids, - const rpc::Address &owner_address, const std::string &call_site, - const int64_t object_size, bool is_reconstructable, - const absl::optional &pinned_at_raylet_id) { +void ReferenceCounter::AddOwnedObject(const ObjectID &object_id, + const std::vector &inner_ids, + const rpc::Address &owner_address, + const std::string &call_site, + const int64_t object_size, bool is_reconstructable, + const absl::optional &pinned_at_raylet_id) { RAY_LOG(DEBUG) << "Adding owned object " << object_id; absl::MutexLock lock(&mutex_); RAY_CHECK(object_id_refs_.count(object_id) == 0) @@ -504,12 +505,12 @@ bool ReferenceCounter::SetDeleteCallback( } std::vector ReferenceCounter::ResetObjectsOnRemovedNode( - const ClientID &raylet_id) { + const NodeID &raylet_id) { absl::MutexLock lock(&mutex_); std::vector lost_objects; for (auto it = object_id_refs_.begin(); it != object_id_refs_.end(); it++) { const auto &object_id = it->first; - if (it->second.pinned_at_raylet_id.value_or(ClientID::Nil()) == raylet_id) { + if (it->second.pinned_at_raylet_id.value_or(NodeID::Nil()) == raylet_id) { lost_objects.push_back(object_id); ReleasePlasmaObject(it); } @@ -518,7 +519,7 @@ std::vector ReferenceCounter::ResetObjectsOnRemovedNode( } void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id, - const ClientID &raylet_id) { + const NodeID &raylet_id) { absl::MutexLock lock(&mutex_); auto it = object_id_refs_.find(object_id); if (it != object_id_refs_.end()) { @@ -539,12 +540,12 @@ void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id, } bool ReferenceCounter::IsPlasmaObjectPinned(const ObjectID &object_id, - ClientID *pinned_at) const { + NodeID *pinned_at) const { absl::MutexLock lock(&mutex_); auto it = object_id_refs_.find(object_id); if (it != object_id_refs_.end()) { if (it->second.owned_by_us) { - *pinned_at = it->second.pinned_at_raylet_id.value_or(ClientID::Nil()); + *pinned_at = it->second.pinned_at_raylet_id.value_or(NodeID::Nil()); return true; } } @@ -890,29 +891,29 @@ void ReferenceCounter::SetReleaseLineageCallback( } void ReferenceCounter::AddObjectLocation(const ObjectID &object_id, - const ClientID &node_id) { + const NodeID &node_id) { absl::MutexLock lock(&mutex_); auto it = object_id_locations_.find(object_id); if (it == object_id_locations_.end()) { - it = object_id_locations_.emplace(object_id, absl::flat_hash_set()).first; + it = object_id_locations_.emplace(object_id, absl::flat_hash_set()).first; } it->second.insert(node_id); } void ReferenceCounter::RemoveObjectLocation(const ObjectID &object_id, - const ClientID &node_id) { + const NodeID &node_id) { absl::MutexLock lock(&mutex_); auto it = object_id_locations_.find(object_id); RAY_CHECK(it != object_id_locations_.end()); it->second.erase(node_id); } -std::unordered_set ReferenceCounter::GetObjectLocations( +std::unordered_set ReferenceCounter::GetObjectLocations( const ObjectID &object_id) { absl::MutexLock lock(&mutex_); auto it = object_id_locations_.find(object_id); RAY_CHECK(it != object_id_locations_.end()); - std::unordered_set locations; + std::unordered_set locations; for (const auto &location : it->second) { locations.insert(location); } diff --git a/src/ray/core_worker/reference_count.h b/src/ray/core_worker/reference_count.h index 1b40874c3..b77a9a7cb 100644 --- a/src/ray/core_worker/reference_count.h +++ b/src/ray/core_worker/reference_count.h @@ -36,13 +36,11 @@ class ReferenceCounterInterface { const std::string &call_site) = 0; virtual bool AddBorrowedObject(const ObjectID &object_id, const ObjectID &outer_id, const rpc::Address &owner_address) = 0; - virtual void AddOwnedObject(const ObjectID &object_id, - const std::vector &contained_ids, - const rpc::Address &owner_address, - const std::string &call_site, const int64_t object_size, - bool is_reconstructable, - const absl::optional &pinned_at_raylet_id = - absl::optional()) = 0; + virtual void AddOwnedObject( + const ObjectID &object_id, const std::vector &contained_ids, + const rpc::Address &owner_address, const std::string &call_site, + const int64_t object_size, bool is_reconstructable, + const absl::optional &pinned_at_raylet_id = absl::optional()) = 0; virtual bool SetDeleteCallback( const ObjectID &object_id, const std::function callback) = 0; @@ -169,7 +167,7 @@ class ReferenceCounter : public ReferenceCounterInterface { const ObjectID &object_id, const std::vector &contained_ids, const rpc::Address &owner_address, const std::string &call_site, const int64_t object_size, bool is_reconstructable, - const absl::optional &pinned_at_raylet_id = absl::optional()) + const absl::optional &pinned_at_raylet_id = absl::optional()) LOCKS_EXCLUDED(mutex_); /// Update the size of the object. @@ -321,7 +319,7 @@ class ReferenceCounter : public ReferenceCounterInterface { /// /// \param[in] object_id The object to update. /// \param[in] raylet_id The raylet that is now pinning the object ID. - void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const ClientID &raylet_id) + void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const NodeID &raylet_id) LOCKS_EXCLUDED(mutex_); /// Check whether the object is pinned at a remote plasma store node. @@ -332,7 +330,7 @@ class ReferenceCounter : public ReferenceCounterInterface { /// \return True if the object exists and is owned by us, false otherwise. We /// return false here because a borrower should not know the pinned location /// for an object. - bool IsPlasmaObjectPinned(const ObjectID &object_id, ClientID *pinned_at) const + bool IsPlasmaObjectPinned(const ObjectID &object_id, NodeID *pinned_at) const LOCKS_EXCLUDED(mutex_); /// Get and reset the objects that were pinned on the given node. This @@ -342,7 +340,7 @@ class ReferenceCounter : public ReferenceCounterInterface { /// /// \param[in] node_id The node whose object store has been removed. /// \return The set of objects that were pinned on the given node. - std::vector ResetObjectsOnRemovedNode(const ClientID &raylet_id); + std::vector ResetObjectsOnRemovedNode(const NodeID &raylet_id); /// Whether we have a reference to a particular ObjectID. /// @@ -361,21 +359,21 @@ class ReferenceCounter : public ReferenceCounterInterface { /// /// \param[in] object_id The object to update. /// \param[in] node_id The node to be added to the location table. - void AddObjectLocation(const ObjectID &object_id, const ClientID &node_id) + void AddObjectLocation(const ObjectID &object_id, const NodeID &node_id) LOCKS_EXCLUDED(mutex_); /// Remove location from the location table of the given object. /// /// \param[in] object_id The object to update. /// \param[in] node_id The node to be removed from the location table. - void RemoveObjectLocation(const ObjectID &object_id, const ClientID &node_id) + void RemoveObjectLocation(const ObjectID &object_id, const NodeID &node_id) LOCKS_EXCLUDED(mutex_); /// Get the locations from the location table of the given object. /// /// \param[in] object_id The object to get locations for. /// \return The nodes that have the object. - std::unordered_set GetObjectLocations(const ObjectID &object_id) + std::unordered_set GetObjectLocations(const ObjectID &object_id) LOCKS_EXCLUDED(mutex_); private: @@ -387,7 +385,7 @@ class ReferenceCounter : public ReferenceCounterInterface { /// Constructor for a reference that we created. Reference(const rpc::Address &owner_address, std::string call_site, const int64_t object_size, bool is_reconstructable, - const absl::optional &pinned_at_raylet_id) + const absl::optional &pinned_at_raylet_id) : call_site(call_site), object_size(object_size), owned_by_us(true), @@ -461,7 +459,7 @@ class ReferenceCounter : public ReferenceCounterInterface { // If this object is owned by us and stored in plasma, and reference // counting is enabled, then some raylet must be pinning the object value. // This is the address of that raylet. - absl::optional pinned_at_raylet_id; + absl::optional pinned_at_raylet_id; // Whether this object can be reconstructed via lineage. If false, then the // object's value will be pinned as long as it is referenced by any other // object's lineage. @@ -680,7 +678,7 @@ class ReferenceCounter : public ReferenceCounterInterface { /// Holds all reference counts and dependency information for tracked ObjectIDs. ReferenceTable object_id_refs_ GUARDED_BY(mutex_); - using LocationTable = absl::flat_hash_map>; + using LocationTable = absl::flat_hash_map>; /// Holds the client information for the owned objects. This table is seperate from /// the reference table because we add object reference after putting object into the diff --git a/src/ray/core_worker/reference_count_test.cc b/src/ray/core_worker/reference_count_test.cc index 605233439..6fd68699d 100644 --- a/src/ray/core_worker/reference_count_test.cc +++ b/src/ray/core_worker/reference_count_test.cc @@ -56,7 +56,7 @@ class MockWorkerClient : public rpc::CoreWorkerClientInterface { rpc::Address CreateRandomAddress(const std::string &addr) { rpc::Address address; address.set_ip_address(addr); - address.set_raylet_id(ClientID::FromRandom().Binary()); + address.set_raylet_id(NodeID::FromRandom().Binary()); address.set_worker_id(WorkerID::FromRandom().Binary()); return address; } @@ -1986,11 +1986,11 @@ TEST_F(ReferenceCountLineageEnabledTest, TestPlasmaLocation) { ObjectID borrowed_id = ObjectID::FromRandom(); rc->AddLocalReference(borrowed_id, ""); - ClientID pinned_at; + NodeID pinned_at; ASSERT_FALSE(rc->IsPlasmaObjectPinned(borrowed_id, &pinned_at)); ObjectID id = ObjectID::FromRandom(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true); rc->AddLocalReference(id, ""); ASSERT_TRUE(rc->SetDeleteCallback(id, callback)); @@ -2023,7 +2023,7 @@ TEST_F(ReferenceCountTest, TestFree) { auto callback = [&](const ObjectID &object_id) { deleted->insert(object_id); }; ObjectID id = ObjectID::FromRandom(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); // Test free before receiving information about where the object is pinned. rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true); @@ -2034,7 +2034,7 @@ TEST_F(ReferenceCountTest, TestFree) { ASSERT_FALSE(rc->SetDeleteCallback(id, callback)); ASSERT_EQ(deleted->count(id), 0); rc->UpdateObjectPinnedAtRaylet(id, node_id); - ClientID pinned_at; + NodeID pinned_at; ASSERT_TRUE(rc->IsPlasmaObjectPinned(id, &pinned_at)); ASSERT_TRUE(pinned_at.IsNil()); ASSERT_TRUE(rc->IsPlasmaObjectFreed(id)); diff --git a/src/ray/core_worker/task_manager.cc b/src/ray/core_worker/task_manager.cc index 8d1fc2eeb..2dcaa047c 100644 --- a/src/ray/core_worker/task_manager.cc +++ b/src/ray/core_worker/task_manager.cc @@ -183,7 +183,7 @@ void TaskManager::CompletePendingTask(const TaskID &task_id, reference_counter_->UpdateObjectSize(object_id, return_object.size()); if (return_object.in_plasma()) { - const auto pinned_at_raylet_id = ClientID::FromBinary(worker_addr.raylet_id()); + const auto pinned_at_raylet_id = NodeID::FromBinary(worker_addr.raylet_id()); if (check_node_alive_(pinned_at_raylet_id)) { reference_counter_->UpdateObjectPinnedAtRaylet(object_id, pinned_at_raylet_id); // Mark it as in plasma with a dummy object. diff --git a/src/ray/core_worker/task_manager.h b/src/ray/core_worker/task_manager.h index 83038c009..83dadf0ee 100644 --- a/src/ray/core_worker/task_manager.h +++ b/src/ray/core_worker/task_manager.h @@ -58,7 +58,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa TaskManager(std::shared_ptr in_memory_store, std::shared_ptr reference_counter, RetryTaskCallback retry_task_callback, - const std::function &check_node_alive, + const std::function &check_node_alive, ReconstructObjectCallback reconstruct_object_callback) : in_memory_store_(in_memory_store), reference_counter_(reference_counter), @@ -238,7 +238,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa /// processing a worker's reply to check whether the node that the worker /// was on is still alive. If the node is down, the plasma objects returned by the task /// are marked as failed. - const std::function check_node_alive_; + const std::function check_node_alive_; /// Called when processing a worker's reply if the node that the worker was /// on died. This should be called to attempt to recover a plasma object /// returned by the task (or store an error if the object is not diff --git a/src/ray/core_worker/test/actor_manager_test.cc b/src/ray/core_worker/test/actor_manager_test.cc index 6d8ea0ee8..06cb9a70e 100644 --- a/src/ray/core_worker/test/actor_manager_test.cc +++ b/src/ray/core_worker/test/actor_manager_test.cc @@ -101,7 +101,7 @@ class MockReferenceCounter : public ReferenceCounterInterface { void(const ObjectID &object_id, const std::vector &contained_ids, const rpc::Address &owner_address, const std::string &call_site, const int64_t object_size, bool is_reconstructable, - const absl::optional &pinned_at_raylet_id)); + const absl::optional &pinned_at_raylet_id)); MOCK_METHOD2(SetDeleteCallback, bool(const ObjectID &object_id, diff --git a/src/ray/core_worker/test/direct_task_transport_test.cc b/src/ray/core_worker/test/direct_task_transport_test.cc index 83ab5cdd7..f1ab2ab48 100644 --- a/src/ray/core_worker/test/direct_task_transport_test.cc +++ b/src/ray/core_worker/test/direct_task_transport_test.cc @@ -120,7 +120,7 @@ class MockRayletClient : public WorkerLeaseInterface { // Trigger reply to RequestWorkerLease. bool GrantWorkerLease(const std::string &address, int port, - const ClientID &retry_at_raylet_id, bool cancel = false) { + const NodeID &retry_at_raylet_id, bool cancel = false) { rpc::RequestWorkerLeaseReply reply; if (cancel) { reply.set_canceled(true); @@ -341,7 +341,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; @@ -354,7 +354,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) { ASSERT_EQ(raylet_client->num_workers_returned, 0); ASSERT_EQ(worker_client->callbacks.size(), 0); - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); ASSERT_EQ(task_finisher->num_tasks_complete, 0); ASSERT_EQ(task_finisher->num_tasks_failed, 0); @@ -382,7 +382,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -390,7 +390,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) { TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor); ASSERT_TRUE(submitter.SubmitTask(task).ok()); - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil())); // Simulate a system failure, i.e., worker died unexpectedly. ASSERT_TRUE(worker_client->ReplyPushTask(Status::IOError("oops"))); ASSERT_EQ(worker_client->callbacks.size(), 0); @@ -416,7 +416,7 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -431,17 +431,17 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed; worker 2 is requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); ASSERT_EQ(raylet_client->num_workers_requested, 2); // Task 2 is pushed; worker 3 is requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 2); ASSERT_EQ(raylet_client->num_workers_requested, 3); // Task 3 is pushed; no more workers requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 3); ASSERT_EQ(raylet_client->num_workers_requested, 3); @@ -471,7 +471,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -486,7 +486,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); ASSERT_EQ(raylet_client->num_workers_requested, 2); ASSERT_EQ(raylet_client->num_leases_canceled, 0); @@ -508,7 +508,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) { ASSERT_EQ(raylet_client->num_workers_returned, 1); // The second lease request is returned immediately. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 0); ASSERT_EQ(raylet_client->num_workers_returned, 2); ASSERT_EQ(raylet_client->num_workers_disconnected, 0); @@ -532,7 +532,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -547,7 +547,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); // Task 1 finishes, Task 2 is scheduled on the same worker. ASSERT_TRUE(worker_client->ReplyPushTask()); // Task 2 finishes, Task 3 is scheduled on the same worker. @@ -569,7 +569,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) { ASSERT_EQ(raylet_client->num_leases_canceled, i); ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease()); ASSERT_EQ(raylet_client->num_leases_canceled, i); - ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true)); + ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true)); ASSERT_EQ(worker_client->callbacks.size(), 0); // The canceled lease is not returned. ASSERT_EQ(raylet_client->num_workers_returned, 1); @@ -592,7 +592,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -605,7 +605,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) { ASSERT_TRUE(submitter.SubmitTask(task2).ok()); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(raylet_client->num_workers_requested, 2); // Task 1 finishes, Task 2 is scheduled on the same worker. ASSERT_TRUE(worker_client->ReplyPushTask()); @@ -624,11 +624,11 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) { // Task 2's lease request is canceled, a new worker is requested for task 3. ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease()); ASSERT_EQ(raylet_client->num_workers_requested, 2); - ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true)); + ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true)); ASSERT_EQ(raylet_client->num_workers_requested, 3); // Task 3 finishes, all workers returned. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_TRUE(worker_client->ReplyPushTask()); ASSERT_EQ(raylet_client->num_workers_returned, 2); ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease()); @@ -649,7 +649,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -662,7 +662,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); ASSERT_EQ(raylet_client->num_workers_requested, 2); @@ -673,7 +673,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) { ASSERT_EQ(raylet_client->num_workers_disconnected, 1); // Task 2 runs successfully on the second worker. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_TRUE(worker_client->ReplyPushTask()); ASSERT_EQ(raylet_client->num_workers_returned, 1); ASSERT_EQ(raylet_client->num_workers_disconnected, 1); @@ -697,7 +697,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -708,7 +708,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); // Task 1 finishes with exit status; the worker is not returned. @@ -745,7 +745,7 @@ TEST(DirectTaskTransportTest, TestSpillback) { auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, lease_client_factory, store, task_finisher, - ClientID::Nil(), kLongTimeout, actor_creator); + NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = ray::FunctionDescriptorBuilder::BuildPython("", "", "", ""); @@ -758,14 +758,14 @@ TEST(DirectTaskTransportTest, TestSpillback) { ASSERT_EQ(remote_lease_clients.size(), 0); // Spillback to a remote node. - auto remote_raylet_id = ClientID::FromRandom(); + auto remote_raylet_id = NodeID::FromRandom(); ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id)); ASSERT_EQ(remote_lease_clients.count(7777), 1); // There should be no more callbacks on the local client. - ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil())); + ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil())); // Trigger retry at the remote node. ASSERT_TRUE( - remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil())); + remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil())); // The worker is returned to the remote node, not the local one. ASSERT_TRUE(worker_client->ReplyPushTask()); @@ -804,7 +804,7 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) { return client; }; auto task_finisher = std::make_shared(); - auto local_raylet_id = ClientID::FromRandom(); + auto local_raylet_id = NodeID::FromRandom(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, lease_client_factory, store, task_finisher, @@ -821,10 +821,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) { ASSERT_EQ(remote_lease_clients.size(), 0); // Spillback to a remote node. - auto remote_raylet_id = ClientID::FromRandom(); + auto remote_raylet_id = NodeID::FromRandom(); ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id)); ASSERT_EQ(remote_lease_clients.count(7777), 1); - ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil())); + ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil())); // Trigger a spillback back to the local node. ASSERT_TRUE( remote_lease_clients[7777]->GrantWorkerLease("local", 1234, local_raylet_id)); @@ -832,10 +832,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) { ASSERT_EQ(remote_lease_clients.size(), 1); // There should be no more callbacks on the remote node. ASSERT_FALSE( - remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil())); + remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil())); // The worker is returned to the local node. - ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, NodeID::Nil())); ASSERT_TRUE(worker_client->ReplyPushTask()); ASSERT_EQ(raylet_client->num_workers_returned, 1); ASSERT_EQ(remote_lease_clients[7777]->num_workers_returned, 0); @@ -868,7 +868,7 @@ void TestSchedulingKey(const std::shared_ptr store, auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); ASSERT_TRUE(submitter.SubmitTask(same1).ok()); @@ -877,7 +877,7 @@ void TestSchedulingKey(const std::shared_ptr store, ASSERT_EQ(raylet_client->num_workers_requested, 2); // same1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 1); // Another worker is requested because same2 is pending. ASSERT_EQ(raylet_client->num_workers_requested, 3); @@ -893,7 +893,7 @@ void TestSchedulingKey(const std::shared_ptr store, ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease()); // different is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 2); ASSERT_EQ(raylet_client->num_workers_requested, 3); @@ -910,7 +910,7 @@ void TestSchedulingKey(const std::shared_ptr store, ASSERT_EQ(raylet_client->num_leases_canceled, 1); // Trigger reply to RequestWorkerLease to remove the canceled pending lease request - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil(), true)); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil(), true)); ASSERT_EQ(raylet_client->num_workers_returned, 2); // Check that there are no entries left in the scheduling_key_entries_ hashmap. These @@ -994,7 +994,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), /*lease_timeout_ms=*/5, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -1009,7 +1009,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1 is pushed. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(raylet_client->num_workers_requested, 2); // Task 1 finishes with failure; the worker is returned due to the error even though @@ -1020,7 +1020,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) { // Task 2 runs successfully on the second worker; the worker is returned due to the // timeout. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); std::this_thread::sleep_for( std::chrono::milliseconds(10)); // Sleep for 10ms, causing the lease to time out. ASSERT_TRUE(worker_client->ReplyPushTask()); @@ -1029,7 +1029,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) { // Task 3 runs successfully on the third worker; the worker is returned even though it // hasn't timed out. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil())); ASSERT_TRUE(worker_client->ReplyPushTask()); ASSERT_EQ(worker_client->callbacks.size(), 0); ASSERT_EQ(raylet_client->num_workers_returned, 2); @@ -1053,7 +1053,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -1061,7 +1061,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) { TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor); ASSERT_TRUE(submitter.SubmitTask(task).ok()); - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil())); // Try force kill, exiting the worker ASSERT_TRUE(submitter.CancelTask(task, true).ok()); @@ -1077,7 +1077,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) { task.GetMutableMessage().set_task_id( TaskID::ForNormalTask(JobID::Nil(), TaskID::Nil(), 1).Binary()); ASSERT_TRUE(submitter.SubmitTask(task).ok()); - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil())); // Try non-force kill, worker returns normally ASSERT_TRUE(submitter.CancelTask(task, false).ok()); @@ -1105,7 +1105,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -1124,7 +1124,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) { ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease()); // Trigger reply to RequestWorkerLease to remove the canceled pending lease request - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil(), true)); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil(), true)); // Check that there are no entries left in the scheduling_key_entries_ hashmap. These // would otherwise cause a memory leak. @@ -1141,7 +1141,7 @@ TEST(DirectTaskTransportTest, TestKillResolvingTask) { auto task_finisher = std::make_shared(); auto actor_creator = std::make_shared(); CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr, - store, task_finisher, ClientID::Nil(), + store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator); std::unordered_map empty_resources; ray::FunctionDescriptor empty_descriptor = @@ -1181,7 +1181,7 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) { // parameter to the CoreWorkerDirectTaskSubmitter. uint32_t max_tasks_in_flight_per_worker = 10; CoreWorkerDirectTaskSubmitter submitter( - address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(), + address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator, max_tasks_in_flight_per_worker); // Prepare 20 tasks and save them in a vector. @@ -1201,12 +1201,12 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // First 10 tasks are pushed; worker 2 is requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 10); ASSERT_EQ(raylet_client->num_workers_requested, 2); // Last 10 tasks are pushed; no more workers are requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 20); ASSERT_EQ(raylet_client->num_workers_requested, 2); @@ -1255,7 +1255,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) { // parameter to the CoreWorkerDirectTaskSubmitter. uint32_t max_tasks_in_flight_per_worker = 10; CoreWorkerDirectTaskSubmitter submitter( - address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(), + address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator, max_tasks_in_flight_per_worker); // prepare 30 tasks and save them in a vector @@ -1275,7 +1275,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) { ASSERT_EQ(raylet_client->num_workers_requested, 1); // Task 1-10 are pushed, and a new worker is requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 10); ASSERT_EQ(raylet_client->num_workers_requested, 2); // The lease is not cancelled, as there is more work to do @@ -1305,7 +1305,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) { ASSERT_EQ(raylet_client->num_workers_returned, 1); // The second lease request is returned immediately. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(worker_client->callbacks.size(), 0); ASSERT_EQ(raylet_client->num_workers_returned, 2); ASSERT_EQ(raylet_client->num_workers_disconnected, 0); @@ -1334,7 +1334,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) { // parameter to the CoreWorkerDirectTaskSubmitter. uint32_t max_tasks_in_flight_per_worker = 10; CoreWorkerDirectTaskSubmitter submitter( - address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(), + address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(), kLongTimeout, actor_creator, max_tasks_in_flight_per_worker); // prepare 30 tasks and save them in a vector @@ -1361,7 +1361,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) { ASSERT_EQ(worker_client->callbacks.size(), 0); // Grant a worker lease, and check that still only 1 worker was requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil())); ASSERT_EQ(raylet_client->num_workers_requested, 1); ASSERT_EQ(raylet_client->num_workers_returned, 0); ASSERT_EQ(raylet_client->num_workers_disconnected, 0); @@ -1399,7 +1399,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) { ASSERT_EQ(worker_client->callbacks.size(), 10); // Grant a worker lease, and check that still only 2 workers were requested. - ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil())); + ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil())); ASSERT_EQ(raylet_client->num_workers_requested, 2); ASSERT_EQ(raylet_client->num_workers_returned, 0); ASSERT_EQ(raylet_client->num_workers_disconnected, 0); diff --git a/src/ray/core_worker/test/object_recovery_manager_test.cc b/src/ray/core_worker/test/object_recovery_manager_test.cc index 201fddbec..da29ab908 100644 --- a/src/ray/core_worker/test/object_recovery_manager_test.cc +++ b/src/ray/core_worker/test/object_recovery_manager_test.cc @@ -104,7 +104,7 @@ class MockObjectDirectory { class ObjectRecoveryManagerTest : public ::testing::Test { public: ObjectRecoveryManagerTest() - : local_raylet_id_(ClientID::FromRandom()), + : local_raylet_id_(NodeID::FromRandom()), object_directory_(std::make_shared()), memory_store_(std::make_shared()), raylet_client_(std::make_shared()), @@ -137,7 +137,7 @@ class ObjectRecoveryManagerTest : public ::testing::Test { }, /*lineage_reconstruction_enabled=*/true) {} - ClientID local_raylet_id_; + NodeID local_raylet_id_; std::unordered_map failed_reconstructions_; std::shared_ptr object_directory_; diff --git a/src/ray/core_worker/test/task_manager_test.cc b/src/ray/core_worker/test/task_manager_test.cc index 82cbfd287..21c21da9a 100644 --- a/src/ray/core_worker/test/task_manager_test.cc +++ b/src/ray/core_worker/test/task_manager_test.cc @@ -46,7 +46,7 @@ class TaskManagerTest : public ::testing::Test { num_retries_++; return Status::OK(); }, - [this](const ClientID &node_id) { return all_nodes_alive_; }, + [this](const NodeID &node_id) { return all_nodes_alive_; }, [this](const ObjectID &object_id) { objects_to_recover_.push_back(object_id); }) {} diff --git a/src/ray/core_worker/transport/direct_task_transport.cc b/src/ray/core_worker/transport/direct_task_transport.cc index 3895cfbbe..9db414299 100644 --- a/src/ray/core_worker/transport/direct_task_transport.cc +++ b/src/ray/core_worker/transport/direct_task_transport.cc @@ -233,9 +233,9 @@ CoreWorkerDirectTaskSubmitter::GetOrConnectLeaseClient( const rpc::Address *raylet_address) { std::shared_ptr lease_client; if (raylet_address && - ClientID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) { + NodeID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) { // A remote raylet was specified. Connect to the raylet if needed. - ClientID raylet_id = ClientID::FromBinary(raylet_address->raylet_id()); + NodeID raylet_id = NodeID::FromBinary(raylet_address->raylet_id()); auto it = remote_lease_clients_.find(raylet_id); if (it == remote_lease_clients_.end()) { RAY_LOG(DEBUG) << "Connecting to raylet " << raylet_id; diff --git a/src/ray/core_worker/transport/direct_task_transport.h b/src/ray/core_worker/transport/direct_task_transport.h index 25985e03f..244e6b5e2 100644 --- a/src/ray/core_worker/transport/direct_task_transport.h +++ b/src/ray/core_worker/transport/direct_task_transport.h @@ -55,7 +55,7 @@ class CoreWorkerDirectTaskSubmitter { std::shared_ptr core_worker_client_pool, LeaseClientFactoryFn lease_client_factory, std::shared_ptr store, - std::shared_ptr task_finisher, ClientID local_raylet_id, + std::shared_ptr task_finisher, NodeID local_raylet_id, int64_t lease_timeout_ms, std::shared_ptr actor_creator, uint32_t max_tasks_in_flight_per_worker = RayConfig::instance().max_tasks_in_flight_per_worker(), @@ -154,8 +154,8 @@ class CoreWorkerDirectTaskSubmitter { std::shared_ptr local_lease_client_; /// Cache of gRPC clients to remote raylets. - absl::flat_hash_map> - remote_lease_clients_ GUARDED_BY(mu_); + absl::flat_hash_map> remote_lease_clients_ + GUARDED_BY(mu_); /// Factory for producing new clients to request leases from remote nodes. LeaseClientFactoryFn lease_client_factory_; @@ -172,7 +172,7 @@ class CoreWorkerDirectTaskSubmitter { /// The local raylet ID. Used to make sure that we use the local lease client /// if a remote raylet tells us to spill the task back to the local raylet. - const ClientID local_raylet_id_; + const NodeID local_raylet_id_; /// Interface for actor creation. std::shared_ptr actor_creator_; diff --git a/src/ray/gcs/accessor.h b/src/ray/gcs/accessor.h index 2624f9e68..1e29a5f53 100644 --- a/src/ray/gcs/accessor.h +++ b/src/ray/gcs/accessor.h @@ -370,7 +370,7 @@ class ObjectInfoAccessor { /// \param node_id The location that will be added to GCS. /// \param callback Callback that will be called after object has been added to GCS. /// \return Status - virtual Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id, + virtual Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) = 0; /// Remove location of object from GCS asynchronously. @@ -379,7 +379,7 @@ class ObjectInfoAccessor { /// \param node_id The location that will be removed from GCS. /// \param callback Callback that will be called after the delete finished. /// \return Status - virtual Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id, + virtual Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) = 0; /// Subscribe to any update of an object's location. @@ -434,8 +434,8 @@ class NodeInfoAccessor { /// Get id of local node which was registered by 'RegisterSelf'. /// - /// \return ClientID - virtual const ClientID &GetSelfId() const = 0; + /// \return NodeID + virtual const NodeID &GetSelfId() const = 0; /// Get information of local node which was registered by 'RegisterSelf'. /// @@ -455,7 +455,7 @@ class NodeInfoAccessor { /// \param node_id The ID of node that to be unregistered. /// \param callback Callback that will be called when unregistration is complete. /// \return Status - virtual Status AsyncUnregister(const ClientID &node_id, + virtual Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) = 0; /// Get information of all nodes from GCS asynchronously. @@ -472,7 +472,7 @@ class NodeInfoAccessor { /// \param done Callback that will be called when subscription is complete. /// \return Status virtual Status AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) = 0; /// Get node information from local cache. @@ -483,7 +483,7 @@ class NodeInfoAccessor { /// \param node_id The ID of node to look up in local cache. /// \return The item returned by GCS. If the item to read doesn't exist, /// this optional object is empty. - virtual boost::optional Get(const ClientID &node_id) const = 0; + virtual boost::optional Get(const NodeID &node_id) const = 0; /// Get information of all nodes from local cache. /// Non-thread safe. @@ -491,7 +491,7 @@ class NodeInfoAccessor { /// is called before. /// /// \return All nodes in cache. - virtual const std::unordered_map &GetAll() const = 0; + virtual const std::unordered_map &GetAll() const = 0; /// Search the local cache to find out if the given node is removed. /// Non-thread safe. @@ -500,7 +500,7 @@ class NodeInfoAccessor { /// /// \param node_id The id of the node to check. /// \return Whether the node is removed. - virtual bool IsRemoved(const ClientID &node_id) const = 0; + virtual bool IsRemoved(const NodeID &node_id) const = 0; // TODO(micafan) Define ResourceMap in GCS proto. typedef std::unordered_map> @@ -511,7 +511,7 @@ class NodeInfoAccessor { /// \param node_id The ID of node to lookup dynamic resources. /// \param callback Callback that will be called after lookup finishes. /// \return Status - virtual Status AsyncGetResources(const ClientID &node_id, + virtual Status AsyncGetResources(const NodeID &node_id, const OptionalItemCallback &callback) = 0; /// Update resources of node in GCS asynchronously. @@ -519,8 +519,7 @@ class NodeInfoAccessor { /// \param node_id The ID of node to update dynamic resources. /// \param resources The dynamic resources of node to be updated. /// \param callback Callback that will be called after update finishes. - virtual Status AsyncUpdateResources(const ClientID &node_id, - const ResourceMap &resources, + virtual Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) = 0; /// Delete resources of a node from GCS asynchronously. @@ -528,7 +527,7 @@ class NodeInfoAccessor { /// \param node_id The ID of node to delete resources from GCS. /// \param resource_names The names of resource to be deleted. /// \param callback Callback that will be called after delete finishes. - virtual Status AsyncDeleteResources(const ClientID &node_id, + virtual Status AsyncDeleteResources(const NodeID &node_id, const std::vector &resource_names, const StatusCallback &callback) = 0; @@ -560,7 +559,7 @@ class NodeInfoAccessor { /// \param done Callback that will be called when subscription is complete. /// \return Status virtual Status AsyncSubscribeHeartbeat( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) = 0; /// Report state of all nodes to GCS asynchronously. diff --git a/src/ray/gcs/gcs_client/global_state_accessor.cc b/src/ray/gcs/gcs_client/global_state_accessor.cc index a4d8cfa28..f59f55937 100644 --- a/src/ray/gcs/gcs_client/global_state_accessor.cc +++ b/src/ray/gcs/gcs_client/global_state_accessor.cc @@ -127,7 +127,7 @@ std::unique_ptr GlobalStateAccessor::GetObjectInfo( return object_info; } -std::string GlobalStateAccessor::GetNodeResourceInfo(const ClientID &node_id) { +std::string GlobalStateAccessor::GetNodeResourceInfo(const NodeID &node_id) { rpc::ResourceMap node_resource_map; std::promise promise; auto on_done = diff --git a/src/ray/gcs/gcs_client/global_state_accessor.h b/src/ray/gcs/gcs_client/global_state_accessor.h index 002628ccd..1786e0877 100644 --- a/src/ray/gcs/gcs_client/global_state_accessor.h +++ b/src/ray/gcs/gcs_client/global_state_accessor.h @@ -84,7 +84,7 @@ class GlobalStateAccessor { /// \return node resource map info. To support multi-language, we serialize each /// ResourceTableData and return the serialized string. Where used, it needs to be /// deserialized with protobuf function. - std::string GetNodeResourceInfo(const ClientID &node_id); + std::string GetNodeResourceInfo(const NodeID &node_id); /// Get internal config from GCS Service. /// diff --git a/src/ray/gcs/gcs_client/service_based_accessor.cc b/src/ray/gcs/gcs_client/service_based_accessor.cc index a0123f136..d1f77cf02 100644 --- a/src/ray/gcs/gcs_client/service_based_accessor.cc +++ b/src/ray/gcs/gcs_client/service_based_accessor.cc @@ -457,7 +457,7 @@ ServiceBasedNodeInfoAccessor::ServiceBasedNodeInfoAccessor( : client_impl_(client_impl) {} Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info) { - auto node_id = ClientID::FromBinary(local_node_info.node_id()); + auto node_id = NodeID::FromBinary(local_node_info.node_id()); RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id << ", address is = " << local_node_info.node_manager_address(); RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected."; @@ -472,7 +472,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_ const Status &status, const rpc::RegisterNodeReply &reply) { if (status.ok()) { local_node_info_.CopyFrom(local_node_info); - local_node_id_ = ClientID::FromBinary(local_node_info.node_id()); + local_node_id_ = NodeID::FromBinary(local_node_info.node_id()); } RAY_LOG(DEBUG) << "Finished registering node info, status = " << status << ", node id = " << node_id; @@ -486,7 +486,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() { RAY_CHECK(!local_node_id_.IsNil()) << "This node is disconnected."; - ClientID node_id = ClientID::FromBinary(local_node_info_.node_id()); + NodeID node_id = NodeID::FromBinary(local_node_info_.node_id()); RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id; rpc::UnregisterNodeRequest request; request.set_node_id(local_node_info_.node_id()); @@ -495,7 +495,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() { [this, node_id](const Status &status, const rpc::UnregisterNodeReply &reply) { if (status.ok()) { local_node_info_.set_state(GcsNodeInfo::DEAD); - local_node_id_ = ClientID::Nil(); + local_node_id_ = NodeID::Nil(); } RAY_LOG(INFO) << "Finished unregistering node info, status = " << status << ", node id = " << node_id; @@ -503,7 +503,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() { return Status::OK(); } -const ClientID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; } +const NodeID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; } const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const { return local_node_info_; @@ -511,7 +511,7 @@ const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const { Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_info, const StatusCallback &callback) { - ClientID node_id = ClientID::FromBinary(node_info.node_id()); + NodeID node_id = NodeID::FromBinary(node_info.node_id()); RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id; rpc::RegisterNodeRequest request; request.mutable_node_info()->CopyFrom(node_info); @@ -527,7 +527,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_ return Status::OK(); } -Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const ClientID &node_id, +Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) { RAY_LOG(DEBUG) << "Unregistering node info, node id = " << node_id; rpc::UnregisterNodeRequest request; @@ -563,8 +563,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetAll( } Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, - const StatusCallback &done) { + const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); RAY_CHECK(node_change_callback_ == nullptr); node_change_callback_ = subscribe; @@ -597,7 +596,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange( } boost::optional ServiceBasedNodeInfoAccessor::Get( - const ClientID &node_id) const { + const NodeID &node_id) const { RAY_CHECK(!node_id.IsNil()); auto entry = node_cache_.find(node_id); if (entry != node_cache_.end()) { @@ -606,17 +605,17 @@ boost::optional ServiceBasedNodeInfoAccessor::Get( return boost::none; } -const std::unordered_map &ServiceBasedNodeInfoAccessor::GetAll() +const std::unordered_map &ServiceBasedNodeInfoAccessor::GetAll() const { return node_cache_; } -bool ServiceBasedNodeInfoAccessor::IsRemoved(const ClientID &node_id) const { +bool ServiceBasedNodeInfoAccessor::IsRemoved(const NodeID &node_id) const { return removed_nodes_.count(node_id) == 1; } Status ServiceBasedNodeInfoAccessor::AsyncGetResources( - const ClientID &node_id, const OptionalItemCallback &callback) { + const NodeID &node_id, const OptionalItemCallback &callback) { RAY_LOG(DEBUG) << "Getting node resources, node id = " << node_id; rpc::GetResourcesRequest request; request.set_node_id(node_id.Binary()); @@ -636,8 +635,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetResources( } Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources( - const ClientID &node_id, const ResourceMap &resources, - const StatusCallback &callback) { + const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) { RAY_LOG(DEBUG) << "Updating node resources, node id = " << node_id; rpc::UpdateResourcesRequest request; request.set_node_id(node_id.Binary()); @@ -664,7 +662,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources( } Status ServiceBasedNodeInfoAccessor::AsyncDeleteResources( - const ClientID &node_id, const std::vector &resource_names, + const NodeID &node_id, const std::vector &resource_names, const StatusCallback &callback) { RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id; rpc::DeleteResourcesRequest request; @@ -732,7 +730,7 @@ void ServiceBasedNodeInfoAccessor::AsyncReReportHeartbeat() { } Status ServiceBasedNodeInfoAccessor::AsyncSubscribeHeartbeat( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) { const std::string error_msg = "Unsupported method of AsyncSubscribeHeartbeat in ServiceBasedNodeInfoAccessor."; @@ -766,7 +764,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeBatchHeartbeat( } void ServiceBasedNodeInfoAccessor::HandleNotification(const GcsNodeInfo &node_info) { - ClientID node_id = ClientID::FromBinary(node_info.node_id()); + NodeID node_id = NodeID::FromBinary(node_info.node_id()); bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE); auto entry = node_cache_.find(node_id); bool is_notif_new; @@ -974,7 +972,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) { Status ServiceBasedTaskInfoAccessor::AsyncAddTaskLease( const std::shared_ptr &data_ptr, const StatusCallback &callback) { TaskID task_id = TaskID::FromBinary(data_ptr->task_id()); - ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id()); + NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id()); RAY_LOG(DEBUG) << "Adding task lease, task id = " << task_id << ", node id = " << node_id; rpc::AddTaskLeaseRequest request; @@ -1061,7 +1059,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &tas Status ServiceBasedTaskInfoAccessor::AttemptTaskReconstruction( const std::shared_ptr &data_ptr, const StatusCallback &callback) { - ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id()); + NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id()); RAY_LOG(DEBUG) << "Reconstructing task, reconstructions num = " << data_ptr->num_reconstructions() << ", node id = " << node_id; rpc::AttemptTaskReconstructionRequest request; @@ -1151,7 +1149,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncGetAll( } Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id, - const ClientID &node_id, + const NodeID &node_id, const StatusCallback &callback) { RAY_LOG(DEBUG) << "Adding object location, object id = " << object_id << ", node id = " << node_id; @@ -1179,7 +1177,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_i } Status ServiceBasedObjectInfoAccessor::AsyncRemoveLocation( - const ObjectID &object_id, const ClientID &node_id, const StatusCallback &callback) { + const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) { RAY_LOG(DEBUG) << "Removing object location, object id = " << object_id << ", node id = " << node_id; rpc::RemoveObjectLocationRequest request; @@ -1299,7 +1297,7 @@ ServiceBasedStatsInfoAccessor::ServiceBasedStatsInfoAccessor( Status ServiceBasedStatsInfoAccessor::AsyncAddProfileData( const std::shared_ptr &data_ptr, const StatusCallback &callback) { - ClientID node_id = ClientID::FromBinary(data_ptr->component_id()); + NodeID node_id = NodeID::FromBinary(data_ptr->component_id()); RAY_LOG(DEBUG) << "Adding profile data, component type = " << data_ptr->component_type() << ", node id = " << node_id; rpc::AddProfileDataRequest request; diff --git a/src/ray/gcs/gcs_client/service_based_accessor.h b/src/ray/gcs/gcs_client/service_based_accessor.h index 6f3289c93..05d5b2788 100644 --- a/src/ray/gcs/gcs_client/service_based_accessor.h +++ b/src/ray/gcs/gcs_client/service_based_accessor.h @@ -156,35 +156,34 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor { Status UnregisterSelf() override; - const ClientID &GetSelfId() const override; + const NodeID &GetSelfId() const override; const GcsNodeInfo &GetSelfInfo() const override; Status AsyncRegister(const rpc::GcsNodeInfo &node_info, const StatusCallback &callback) override; - Status AsyncUnregister(const ClientID &node_id, - const StatusCallback &callback) override; + Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override; Status AsyncGetAll(const MultiItemCallback &callback) override; Status AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) override; - boost::optional Get(const ClientID &node_id) const override; + boost::optional Get(const NodeID &node_id) const override; - const std::unordered_map &GetAll() const override; + const std::unordered_map &GetAll() const override; - bool IsRemoved(const ClientID &node_id) const override; + bool IsRemoved(const NodeID &node_id) const override; - Status AsyncGetResources(const ClientID &node_id, + Status AsyncGetResources(const NodeID &node_id, const OptionalItemCallback &callback) override; - Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources, + Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) override; - Status AsyncDeleteResources(const ClientID &node_id, + Status AsyncDeleteResources(const NodeID &node_id, const std::vector &resource_names, const StatusCallback &callback) override; @@ -197,7 +196,7 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor { void AsyncReReportHeartbeat() override; Status AsyncSubscribeHeartbeat( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) override; Status AsyncReportBatchHeartbeat( @@ -240,20 +239,20 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor { ServiceBasedGcsClient *client_impl_; using NodeChangeCallback = - std::function; + std::function; GcsNodeInfo local_node_info_; - ClientID local_node_id_; + NodeID local_node_id_; - Sequencer sequencer_; + Sequencer sequencer_; /// The callback to call when a new node is added or a node is removed. NodeChangeCallback node_change_callback_{nullptr}; /// A cache for information about all nodes. - std::unordered_map node_cache_; + std::unordered_map node_cache_; /// The set of removed nodes. - std::unordered_set removed_nodes_; + std::unordered_set removed_nodes_; }; /// \class ServiceBasedTaskInfoAccessor @@ -329,10 +328,10 @@ class ServiceBasedObjectInfoAccessor : public ObjectInfoAccessor { Status AsyncGetAll(const MultiItemCallback &callback) override; - Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id, + Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) override; - Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id, + Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) override; Status AsyncSubscribeToLocations( diff --git a/src/ray/gcs/gcs_client/test/global_state_accessor_test.cc b/src/ray/gcs/gcs_client/test/global_state_accessor_test.cc index f86c9e187..f14e2675e 100644 --- a/src/ray/gcs/gcs_client/test/global_state_accessor_test.cc +++ b/src/ray/gcs/gcs_client/test/global_state_accessor_test.cc @@ -139,7 +139,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) { for (int index = 0; index < node_count; ++index) { auto node_table_data = Mocker::GenNodeInfo(index, std::string("127.0.0.") + std::to_string(index)); - auto node_id = ClientID::FromBinary(node_table_data->node_id()); + auto node_id = NodeID::FromBinary(node_table_data->node_id()); std::promise promise; RAY_CHECK_OK(gcs_client_->Nodes().AsyncRegister( *node_table_data, [&promise](Status status) { promise.set_value(status.ok()); })); @@ -158,7 +158,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) { rpc::GcsNodeInfo node_data; node_data.ParseFromString(node_table[index]); auto resource_map_str = - global_state_->GetNodeResourceInfo(ClientID::FromBinary(node_data.node_id())); + global_state_->GetNodeResourceInfo(NodeID::FromBinary(node_data.node_id())); rpc::ResourceMap resource_map; resource_map.ParseFromString(resource_map_str); ASSERT_EQ( @@ -195,7 +195,7 @@ TEST_F(GlobalStateAccessorTest, TestProfileTable) { int profile_count = RayConfig::instance().maximum_profile_table_rows_count() + 1; ASSERT_EQ(global_state_->GetAllProfileInfo().size(), 0); for (int index = 0; index < profile_count; ++index) { - auto client_id = ClientID::FromRandom(); + auto client_id = NodeID::FromRandom(); auto profile_table_data = Mocker::GenProfileTableData(client_id); std::promise promise; RAY_CHECK_OK(gcs_client_->Stats().AsyncAddProfileData( @@ -215,7 +215,7 @@ TEST_F(GlobalStateAccessorTest, TestObjectTable) { for (int index = 0; index < object_count; ++index) { ObjectID object_id = ObjectID::FromRandom(); object_ids.emplace_back(object_id); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); std::promise promise; RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation( object_id, node_id, diff --git a/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc b/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc index 64f044f4c..310361ad7 100644 --- a/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc +++ b/src/ray/gcs/gcs_client/test/service_based_gcs_client_test.cc @@ -221,7 +221,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { } bool SubscribeToNodeChange( - const gcs::SubscribeCallback &subscribe) { + const gcs::SubscribeCallback &subscribe) { std::promise promise; RAY_CHECK_OK(gcs_client_->Nodes().AsyncSubscribeToNodeChange( subscribe, [&promise](Status status) { promise.set_value(status.ok()); })); @@ -258,7 +258,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { return nodes; } - bool UnregisterNode(const ClientID &node_id) { + bool UnregisterNode(const NodeID &node_id) { std::promise promise; RAY_CHECK_OK(gcs_client_->Nodes().AsyncUnregister( node_id, [&promise](Status status) { promise.set_value(status.ok()); })); @@ -272,7 +272,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { return WaitReady(promise.get_future(), timeout_ms_); } - gcs::NodeInfoAccessor::ResourceMap GetResources(const ClientID &node_id) { + gcs::NodeInfoAccessor::ResourceMap GetResources(const NodeID &node_id) { gcs::NodeInfoAccessor::ResourceMap resource_map; std::promise promise; RAY_CHECK_OK(gcs_client_->Nodes().AsyncGetResources( @@ -288,7 +288,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { return resource_map; } - bool UpdateResources(const ClientID &node_id, const std::string &key) { + bool UpdateResources(const NodeID &node_id, const std::string &key) { std::promise promise; gcs::NodeInfoAccessor::ResourceMap resource_map; auto resource = std::make_shared(); @@ -300,7 +300,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { return WaitReady(promise.get_future(), timeout_ms_); } - bool DeleteResources(const ClientID &node_id, + bool DeleteResources(const NodeID &node_id, const std::vector &resource_names) { std::promise promise; RAY_CHECK_OK(gcs_client_->Nodes().AsyncDeleteResources( @@ -415,7 +415,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { RAY_CHECK_OK(gcs_client_->Objects().AsyncUnsubscribeToLocations(object_id)); } - bool AddLocation(const ObjectID &object_id, const ClientID &node_id) { + bool AddLocation(const ObjectID &object_id, const NodeID &node_id) { std::promise promise; RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation( object_id, node_id, @@ -423,7 +423,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test { return WaitReady(promise.get_future(), timeout_ms_); } - bool RemoveLocation(const ObjectID &object_id, const ClientID &node_id) { + bool RemoveLocation(const ObjectID &object_id, const NodeID &node_id) { std::promise promise; RAY_CHECK_OK(gcs_client_->Objects().AsyncRemoveLocation( object_id, node_id, @@ -601,12 +601,12 @@ TEST_F(ServiceBasedGcsClientTest, TestActorSubscribeAll) { TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) { // Create gcs node info. auto gcs_node1_info = Mocker::GenNodeInfo(); - ClientID node1_id = ClientID::FromBinary(gcs_node1_info->node_id()); + NodeID node1_id = NodeID::FromBinary(gcs_node1_info->node_id()); // Subscribe to node addition and removal events from GCS. std::atomic register_count(0); std::atomic unregister_count(0); - auto on_subscribe = [®ister_count, &unregister_count](const ClientID &node_id, + auto on_subscribe = [®ister_count, &unregister_count](const NodeID &node_id, const rpc::GcsNodeInfo &data) { if (data.state() == rpc::GcsNodeInfo::ALIVE) { ++register_count; @@ -625,7 +625,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) { // Register a node to GCS. auto gcs_node2_info = Mocker::GenNodeInfo(); - ClientID node2_id = ClientID::FromBinary(gcs_node2_info->node_id()); + NodeID node2_id = NodeID::FromBinary(gcs_node2_info->node_id()); ASSERT_TRUE(RegisterNode(*gcs_node2_info)); WaitForExpectedCount(register_count, 2); @@ -671,7 +671,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeResources) { RAY_CHECK(RegisterNode(*node_info)); // Update resources of node in GCS. - ClientID node_id = ClientID::FromBinary(node_info->node_id()); + NodeID node_id = NodeID::FromBinary(node_info->node_id()); std::string key = "CPU"; ASSERT_TRUE(UpdateResources(node_id, key)); WaitForExpectedCount(add_count, 1); @@ -697,7 +697,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeHeartbeat) { RAY_CHECK(RegisterNode(*node_info)); // Report heartbeat of a node to GCS. - ClientID node_id = ClientID::FromBinary(node_info->node_id()); + NodeID node_id = NodeID::FromBinary(node_info->node_id()); auto heartbeat = std::make_shared(); heartbeat->set_client_id(node_id.Binary()); // Set this flag because GCS won't publish unchanged heartbeat. @@ -747,7 +747,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) { ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe)); // Add a task lease to GCS. - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary()); ASSERT_TRUE(AddTaskLease(task_lease)); WaitForExpectedCount(task_lease_count, 2); @@ -771,7 +771,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) { TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) { ObjectID object_id = ObjectID::FromRandom(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); // Subscribe to any update of an object's location. std::atomic object_add_count(0); @@ -816,7 +816,7 @@ TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) { TEST_F(ServiceBasedGcsClientTest, TestStats) { // Add profile data to GCS. - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); auto profile_table_data = Mocker::GenProfileTableData(node_id); ASSERT_TRUE(AddProfileData(profile_table_data)); } @@ -938,7 +938,7 @@ TEST_F(ServiceBasedGcsClientTest, TestActorTableResubscribe) { TEST_F(ServiceBasedGcsClientTest, TestObjectTableResubscribe) { ObjectID object1_id = ObjectID::FromRandom(); ObjectID object2_id = ObjectID::FromRandom(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); // Subscribe to any update of an object's location. std::atomic object1_change_count(0); @@ -985,7 +985,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) { // Test that subscription of the node table can still work when GCS server restarts. // Subscribe to node addition and removal events from GCS and cache those information. std::atomic node_change_count(0); - auto node_subscribe = [&node_change_count](const ClientID &id, + auto node_subscribe = [&node_change_count](const NodeID &id, const rpc::GcsNodeInfo &result) { ++node_change_count; }; @@ -1009,7 +1009,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) { auto node_info = Mocker::GenNodeInfo(1); ASSERT_TRUE(RegisterNode(*node_info)); - ClientID node_id = ClientID::FromBinary(node_info->node_id()); + NodeID node_id = NodeID::FromBinary(node_info->node_id()); std::string key = "CPU"; ASSERT_TRUE(UpdateResources(node_id, key)); auto heartbeat = std::make_shared(); @@ -1023,7 +1023,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) { node_info = Mocker::GenNodeInfo(1); ASSERT_TRUE(RegisterNode(*node_info)); - node_id = ClientID::FromBinary(node_info->node_id()); + node_id = NodeID::FromBinary(node_info->node_id()); ASSERT_TRUE(UpdateResources(node_id, key)); heartbeat->set_client_id(node_info->node_id()); ASSERT_TRUE(ReportHeartbeat(heartbeat)); @@ -1056,7 +1056,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) { ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe)); ASSERT_TRUE(AddTask(task_table_data)); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary()); ASSERT_TRUE(AddTaskLease(task_lease)); WaitForExpectedCount(task_count, 1); @@ -1065,7 +1065,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) { RestartGcsServer(); - node_id = ClientID::FromRandom(); + node_id = NodeID::FromRandom(); task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary()); ASSERT_TRUE(AddTaskLease(task_lease)); WaitForExpectedCount(task_lease_count, 3); @@ -1096,7 +1096,7 @@ TEST_F(ServiceBasedGcsClientTest, TestWorkerTableResubscribe) { TEST_F(ServiceBasedGcsClientTest, TestGcsTableReload) { ObjectID object_id = ObjectID::FromRandom(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); // Register node to GCS. auto node_info = Mocker::GenNodeInfo(); diff --git a/src/ray/gcs/gcs_server/gcs_actor_manager.cc b/src/ray/gcs/gcs_server/gcs_actor_manager.cc index 9bdd26b17..2e0ba773a 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_actor_manager.cc @@ -21,12 +21,12 @@ namespace ray { namespace gcs { -ClientID GcsActor::GetNodeID() const { +NodeID GcsActor::GetNodeID() const { const auto &raylet_id_binary = actor_table_data_.address().raylet_id(); if (raylet_id_binary.empty()) { - return ClientID::Nil(); + return NodeID::Nil(); } - return ClientID::FromBinary(raylet_id_binary); + return NodeID::FromBinary(raylet_id_binary); } void GcsActor::UpdateAddress(const rpc::Address &address) { @@ -47,8 +47,8 @@ WorkerID GcsActor::GetOwnerID() const { return WorkerID::FromBinary(GetOwnerAddress().worker_id()); } -ClientID GcsActor::GetOwnerNodeID() const { - return ClientID::FromBinary(GetOwnerAddress().raylet_id()); +NodeID GcsActor::GetOwnerNodeID() const { + return NodeID::FromBinary(GetOwnerAddress().raylet_id()); } const rpc::Address &GcsActor::GetOwnerAddress() const { @@ -425,7 +425,7 @@ Status GcsActorManager::RegisterActor(const ray::rpc::RegisterActorRequest &requ RAY_CHECK(registered_actors_.emplace(actor->GetActorID(), actor).second); const auto &owner_address = actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); auto worker_id = WorkerID::FromBinary(owner_address.worker_id()); RAY_CHECK(unresolved_actors_[node_id][worker_id].emplace(actor->GetActorID()).second); @@ -649,7 +649,7 @@ void GcsActorManager::DestroyActor(const ActorID &actor_id) { } absl::flat_hash_set GcsActorManager::GetUnresolvedActorsByOwnerNode( - const ClientID &node_id) const { + const NodeID &node_id) const { absl::flat_hash_set actor_ids; auto iter = unresolved_actors_.find(node_id); if (iter != unresolved_actors_.end()) { @@ -661,7 +661,7 @@ absl::flat_hash_set GcsActorManager::GetUnresolvedActorsByOwnerNode( } absl::flat_hash_set GcsActorManager::GetUnresolvedActorsByOwnerWorker( - const ClientID &node_id, const WorkerID &worker_id) const { + const NodeID &node_id, const WorkerID &worker_id) const { absl::flat_hash_set actor_ids; auto iter = unresolved_actors_.find(node_id); if (iter != unresolved_actors_.end()) { @@ -673,7 +673,7 @@ absl::flat_hash_set GcsActorManager::GetUnresolvedActorsByOwnerWorker( return actor_ids; } -void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id, +void GcsActorManager::OnWorkerDead(const ray::NodeID &node_id, const ray::WorkerID &worker_id, bool intentional_exit) { if (intentional_exit) { @@ -727,7 +727,7 @@ void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id, ReconstructActor(actor_id, /*need_reschedule=*/!intentional_exit); } -void GcsActorManager::OnNodeDead(const ClientID &node_id) { +void GcsActorManager::OnNodeDead(const NodeID &node_id) { RAY_LOG(WARNING) << "Node " << node_id << " failed, reconstructing actors."; const auto it = owners_.find(node_id); if (it != owners_.end()) { @@ -913,7 +913,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) { RAY_LOG(INFO) << "Loading initial data."; auto callback = [this, done](const std::unordered_map &result) { - std::unordered_map> node_to_workers; + std::unordered_map> node_to_workers; for (auto &item : result) { if (item.second.state() != ray::rpc::ActorTableData::DEAD) { auto actor = std::make_shared(item.second); @@ -925,7 +925,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) { if (item.second.state() == ray::rpc::ActorTableData::DEPENDENCIES_UNREADY) { const auto &owner = actor->GetOwnerAddress(); - const auto &owner_node = ClientID::FromBinary(owner.raylet_id()); + const auto &owner_node = NodeID::FromBinary(owner.raylet_id()); const auto &owner_worker = WorkerID::FromBinary(owner.worker_id()); RAY_CHECK(unresolved_actors_[owner_node][owner_worker] .emplace(actor->GetActorID()) @@ -1026,7 +1026,7 @@ void GcsActorManager::OnJobFinished(const JobID &job_id) { RAY_CHECK_OK(gcs_table_storage_->ActorTable().GetByJobId(job_id, on_done)); } -const absl::flat_hash_map> +const absl::flat_hash_map> &GcsActorManager::GetCreatedActors() const { return created_actors_; } @@ -1043,7 +1043,7 @@ const absl::flat_hash_map> void GcsActorManager::RemoveUnresolvedActor(const std::shared_ptr &actor) { const auto &owner_address = actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); auto worker_id = WorkerID::FromBinary(owner_address.worker_id()); auto iter = unresolved_actors_.find(node_id); if (iter != unresolved_actors_.end()) { diff --git a/src/ray/gcs/gcs_server/gcs_actor_manager.h b/src/ray/gcs/gcs_server/gcs_actor_manager.h index 289b23284..00a2bb7ec 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_manager.h +++ b/src/ray/gcs/gcs_server/gcs_actor_manager.h @@ -63,18 +63,18 @@ class GcsActor { actor_table_data_.set_state(rpc::ActorTableData::DEPENDENCIES_UNREADY); actor_table_data_.mutable_task_spec()->CopyFrom(task_spec); - actor_table_data_.mutable_address()->set_raylet_id(ClientID::Nil().Binary()); + actor_table_data_.mutable_address()->set_raylet_id(NodeID::Nil().Binary()); actor_table_data_.mutable_address()->set_worker_id(WorkerID::Nil().Binary()); } /// Get the node id on which this actor is created. - ClientID GetNodeID() const; + NodeID GetNodeID() const; /// Get the id of the worker on which this actor is created. WorkerID GetWorkerID() const; /// Get the actor's owner ID. WorkerID GetOwnerID() const; /// Get the node ID of the actor's owner. - ClientID GetOwnerNodeID() const; + NodeID GetOwnerNodeID() const; /// Get the address of the actor's owner. const rpc::Address &GetOwnerAddress() const; @@ -248,7 +248,7 @@ class GcsActorManager : public rpc::ActorInfoHandler { /// owned an actor, those actors will be destroyed. /// /// \param node_id The specified node id. - void OnNodeDead(const ClientID &node_id); + void OnNodeDead(const NodeID &node_id); /// Handle a worker failure. This will restart the associated actor, if any, /// which may be pending or already created. If the worker owned other @@ -258,7 +258,7 @@ class GcsActorManager : public rpc::ActorInfoHandler { /// \param worker_id ID of the dead worker. /// \param intentional_exit Whether the death was intentional. If yes and the /// worker was an actor, we should not attempt to restart the actor. - void OnWorkerDead(const ClientID &node_id, const WorkerID &worker_id, + void OnWorkerDead(const NodeID &node_id, const WorkerID &worker_id, bool intentional_exit = false); /// Handle actor creation task failure. This should be called when scheduling @@ -288,7 +288,7 @@ class GcsActorManager : public rpc::ActorInfoHandler { /// Get the created actors. /// /// \return The created actors. - const absl::flat_hash_map> + const absl::flat_hash_map> &GetCreatedActors() const; const absl::flat_hash_map> &GetRegisteredActors() @@ -323,11 +323,11 @@ class GcsActorManager : public rpc::ActorInfoHandler { /// Get unresolved actors that were submitted from the specified node. absl::flat_hash_set GetUnresolvedActorsByOwnerNode( - const ClientID &node_id) const; + const NodeID &node_id) const; /// Get unresolved actors that were submitted from the specified worker. absl::flat_hash_set GetUnresolvedActorsByOwnerWorker( - const ClientID &node_id, const WorkerID &worker_id) const; + const NodeID &node_id, const WorkerID &worker_id) const; private: /// Reconstruct the specified actor. @@ -372,18 +372,17 @@ class GcsActorManager : public rpc::ActorInfoHandler { /// Maps from worker ID to a client and the IDs of the actors owned by that worker. /// The actor whose dependencies are not resolved should be destroyed once it creator /// dies. - absl::flat_hash_map>> + absl::flat_hash_map>> unresolved_actors_; /// The pending actors which will not be scheduled until there's a resource change. std::vector> pending_actors_; /// Map contains the relationship of node and created actors. Each node ID /// maps to a map from worker ID to the actor created on that worker. - absl::flat_hash_map> created_actors_; + absl::flat_hash_map> created_actors_; /// Map from worker ID to a client and the IDs of the actors owned by that /// worker. An owned actor should be destroyed once it has gone out of scope, /// according to its owner, or the owner dies. - absl::flat_hash_map> owners_; + absl::flat_hash_map> owners_; /// The scheduler to schedule all registered actors. std::shared_ptr gcs_actor_scheduler_; diff --git a/src/ray/gcs/gcs_server/gcs_actor_scheduler.cc b/src/ray/gcs/gcs_server/gcs_actor_scheduler.cc index 5b82bc84c..140c77662 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_scheduler.cc +++ b/src/ray/gcs/gcs_server/gcs_actor_scheduler.cc @@ -89,7 +89,7 @@ void GcsActorScheduler::Reschedule(std::shared_ptr actor) { } } -std::vector GcsActorScheduler::CancelOnNode(const ClientID &node_id) { +std::vector GcsActorScheduler::CancelOnNode(const NodeID &node_id) { // Remove all the actors from the map associated with this node, and return them as they // will be reconstructed later. std::vector actor_ids; @@ -124,8 +124,7 @@ std::vector GcsActorScheduler::CancelOnNode(const ClientID &node_id) { return actor_ids; } -void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id, - const ActorID &actor_id) { +void GcsActorScheduler::CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) { // NOTE: This method does not currently cancel the outstanding lease request. // It only removes leasing information from the internal state so that // RequestWorkerLease ignores the response from raylet. @@ -134,7 +133,7 @@ void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id, node_it->second.erase(actor_id); } -ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id, +ActorID GcsActorScheduler::CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) { // Remove the worker from creating map and return ID of the actor associated with the // removed worker if exist, else return NilID. @@ -156,7 +155,7 @@ ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id, } void GcsActorScheduler::ReleaseUnusedWorkers( - const std::unordered_map> &node_to_workers) { + const std::unordered_map> &node_to_workers) { // The purpose of this function is to release leased workers that may be leaked. // When GCS restarts, it doesn't know which workers it has leased in the previous // lifecycle. In this case, GCS will send a list of worker ids that are still needed. @@ -192,7 +191,7 @@ void GcsActorScheduler::LeaseWorkerFromNode(std::shared_ptr actor, std::shared_ptr node) { RAY_CHECK(actor && node); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); RAY_LOG(INFO) << "Start leasing worker from node " << node_id << " for actor " << actor->GetActorID(); @@ -277,7 +276,7 @@ void GcsActorScheduler::HandleWorkerLeasedReply( // The worker did not succeed in the lease, but the specified node returned a new // node, and then try again on the new node. RAY_CHECK(!retry_at_raylet_address.raylet_id().empty()); - auto spill_back_node_id = ClientID::FromBinary(retry_at_raylet_address.raylet_id()); + auto spill_back_node_id = NodeID::FromBinary(retry_at_raylet_address.raylet_id()); if (auto spill_back_node = gcs_node_manager_.GetNode(spill_back_node_id)) { actor->UpdateAddress(retry_at_raylet_address); RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()] @@ -410,7 +409,7 @@ std::shared_ptr GcsActorScheduler::SelectNodeRandomly() const std::shared_ptr GcsActorScheduler::GetOrConnectLeaseClient( const rpc::Address &raylet_address) { - auto node_id = ClientID::FromBinary(raylet_address.raylet_id()); + auto node_id = NodeID::FromBinary(raylet_address.raylet_id()); auto iter = remote_lease_clients_.find(node_id); if (iter == remote_lease_clients_.end()) { auto lease_client = lease_client_factory_(raylet_address); diff --git a/src/ray/gcs/gcs_server/gcs_actor_scheduler.h b/src/ray/gcs/gcs_server/gcs_actor_scheduler.h index dc95048e4..99456adec 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_scheduler.h +++ b/src/ray/gcs/gcs_server/gcs_actor_scheduler.h @@ -54,26 +54,26 @@ class GcsActorSchedulerInterface { /// /// \param node_id ID of the node where the worker is located. /// \return ID list of actors associated with the specified node id. - virtual std::vector CancelOnNode(const ClientID &node_id) = 0; + virtual std::vector CancelOnNode(const NodeID &node_id) = 0; /// Cancel a outstanding leasing request to raylets. /// /// \param node_id ID of the node where the actor leasing request has been sent. /// \param actor_id ID of an actor. - virtual void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) = 0; + virtual void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) = 0; /// Cancel the actor that is being scheduled to the specified worker. /// /// \param node_id ID of the node where the worker is located. /// \param worker_id ID of the worker that the actor is creating on. /// \return ID of actor associated with the specified node id and worker id. - virtual ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) = 0; + virtual ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) = 0; /// Notify raylets to release unused workers. /// /// \param node_to_workers Workers used by each node. virtual void ReleaseUnusedWorkers( - const std::unordered_map> &node_to_workers) = 0; + const std::unordered_map> &node_to_workers) = 0; virtual ~GcsActorSchedulerInterface() {} }; @@ -120,7 +120,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface { /// /// \param node_id ID of the node where the worker is located. /// \return ID list of actors associated with the specified node id. - std::vector CancelOnNode(const ClientID &node_id) override; + std::vector CancelOnNode(const NodeID &node_id) override; /// Cancel a outstanding leasing request to raylets. /// @@ -130,20 +130,20 @@ class GcsActorScheduler : public GcsActorSchedulerInterface { /// /// \param node_id ID of the node where the actor leasing request has been sent. /// \param actor_id ID of an actor. - void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) override; + void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) override; /// Cancel the actor that is being scheduled to the specified worker. /// /// \param node_id ID of the node where the worker is located. /// \param worker_id ID of the worker that the actor is creating on. /// \return ID of actor associated with the specified node id and worker id. - ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) override; + ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) override; /// Notify raylets to release unused workers. /// /// \param node_to_workers Workers used by each node. - void ReleaseUnusedWorkers(const std::unordered_map> - &node_to_workers) override; + void ReleaseUnusedWorkers( + const std::unordered_map> &node_to_workers) override; protected: /// The GcsLeasedWorker is kind of abstraction of remote leased worker inside raylet. It @@ -178,7 +178,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface { WorkerID GetWorkerID() const { return WorkerID::FromBinary(address_.worker_id()); } /// Get the NodeID of this leased worker. - ClientID GetNodeID() const { return ClientID::FromBinary(address_.raylet_id()); } + NodeID GetNodeID() const { return NodeID::FromBinary(address_.raylet_id()); } /// Get the id of the actor which is assigned to this leased worker. ActorID GetAssignedActorID() const { return assigned_actor_id_; } @@ -269,15 +269,14 @@ class GcsActorScheduler : public GcsActorSchedulerInterface { /// Map from node ID to the set of actors for whom we are trying to acquire a lease from /// that node. This is needed so that we can retry lease requests from the node until we /// receive a reply or the node is removed. - absl::flat_hash_map> - node_to_actors_when_leasing_; + absl::flat_hash_map> node_to_actors_when_leasing_; /// Map from node ID to the workers on which we are trying to create actors. This is /// needed so that we can cancel actor creation requests if the worker is removed. - absl::flat_hash_map>> node_to_workers_when_creating_; /// The cached node clients which are used to communicate with raylet to lease workers. - absl::flat_hash_map> + absl::flat_hash_map> remote_lease_clients_; /// Reference of GcsNodeManager. const GcsNodeManager &gcs_node_manager_; @@ -290,7 +289,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface { /// Factory for producing new clients to request leases from remote nodes. LeaseClientFactoryFn lease_client_factory_; /// The nodes which are releasing unused workers. - absl::flat_hash_set nodes_of_releasing_unused_workers_; + absl::flat_hash_set nodes_of_releasing_unused_workers_; /// The cached core worker clients which are used to communicate with leased worker. rpc::CoreWorkerClientPool core_worker_clients_; }; diff --git a/src/ray/gcs/gcs_server/gcs_node_manager.cc b/src/ray/gcs/gcs_server/gcs_node_manager.cc index c9ee5062c..54453343c 100644 --- a/src/ray/gcs/gcs_server/gcs_node_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_node_manager.cc @@ -25,7 +25,7 @@ GcsNodeManager::NodeFailureDetector::NodeFailureDetector( boost::asio::io_service &io_service, std::shared_ptr gcs_table_storage, std::shared_ptr gcs_pub_sub, - std::function on_node_death_callback) + std::function on_node_death_callback) : gcs_table_storage_(std::move(gcs_table_storage)), on_node_death_callback_(std::move(on_node_death_callback)), num_heartbeats_timeout_(RayConfig::instance().num_heartbeats_timeout()), @@ -40,12 +40,12 @@ void GcsNodeManager::NodeFailureDetector::Start() { } } -void GcsNodeManager::NodeFailureDetector::AddNode(const ray::ClientID &node_id) { +void GcsNodeManager::NodeFailureDetector::AddNode(const ray::NodeID &node_id) { heartbeats_.emplace(node_id, num_heartbeats_timeout_); } void GcsNodeManager::NodeFailureDetector::HandleHeartbeat( - const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat_data) { + const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat_data) { auto iter = heartbeats_.find(node_id); if (iter == heartbeats_.end()) { // Ignore this heartbeat as the node is not registered. @@ -145,7 +145,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service, : main_io_service_(main_io_service), node_failure_detector_(new NodeFailureDetector( node_failure_detector_io_service, gcs_table_storage, gcs_pub_sub, - [this](const ClientID &node_id) { + [this](const NodeID &node_id) { // Post this to main event loop to avoid potential concurrency issues. main_io_service_.post([this, node_id] { if (auto node = RemoveNode(node_id, /* is_intended = */ false)) { @@ -171,7 +171,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service, void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request, rpc::RegisterNodeReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.node_info().node_id()); + NodeID node_id = NodeID::FromBinary(request.node_info().node_id()); RAY_LOG(INFO) << "Registering node info, node id = " << node_id << ", address = " << request.node_info().node_manager_address(); AddNode(std::make_shared(request.node_info())); @@ -191,7 +191,7 @@ void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request, void GcsNodeManager::HandleUnregisterNode(const rpc::UnregisterNodeRequest &request, rpc::UnregisterNodeReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id; if (auto node = RemoveNode(node_id, /* is_intended = */ true)) { node->set_state(rpc::GcsNodeInfo::DEAD); @@ -228,7 +228,7 @@ void GcsNodeManager::HandleGetAllNodeInfo(const rpc::GetAllNodeInfoRequest &requ void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &request, rpc::ReportHeartbeatReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.heartbeat().client_id()); + NodeID node_id = NodeID::FromBinary(request.heartbeat().client_id()); auto heartbeat_data = std::make_shared(); heartbeat_data->CopyFrom(request.heartbeat()); @@ -248,7 +248,7 @@ void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &re void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request, rpc::GetResourcesReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); auto iter = cluster_resources_.find(node_id); if (iter != cluster_resources_.end()) { for (auto &resource : iter->second.items()) { @@ -261,7 +261,7 @@ void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request, void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &request, rpc::UpdateResourcesReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); RAY_LOG(DEBUG) << "Updating resources, node id = " << node_id; auto iter = cluster_resources_.find(node_id); auto to_be_updated_resources = request.resources(); @@ -298,7 +298,7 @@ void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &re void GcsNodeManager::HandleDeleteResources(const rpc::DeleteResourcesRequest &request, rpc::DeleteResourcesReply *reply, rpc::SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id; auto resource_names = VectorFromProtobuf(request.resource_name_list()); auto iter = cluster_resources_.find(node_id); @@ -355,7 +355,7 @@ void GcsNodeManager::HandleGetInternalConfig(const rpc::GetInternalConfigRequest } std::shared_ptr GcsNodeManager::GetNode( - const ray::ClientID &node_id) const { + const ray::NodeID &node_id) const { auto iter = alive_nodes_.find(node_id); if (iter == alive_nodes_.end()) { return nullptr; @@ -365,7 +365,7 @@ std::shared_ptr GcsNodeManager::GetNode( } void GcsNodeManager::AddNode(std::shared_ptr node) { - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); auto iter = alive_nodes_.find(node_id); if (iter == alive_nodes_.end()) { alive_nodes_.emplace(node_id, node); @@ -385,7 +385,7 @@ void GcsNodeManager::AddNode(std::shared_ptr node) { } std::shared_ptr GcsNodeManager::RemoveNode( - const ray::ClientID &node_id, bool is_intended /*= false*/) { + const ray::NodeID &node_id, bool is_intended /*= false*/) { RAY_LOG(INFO) << "Removing node, node id = " << node_id; std::shared_ptr removed_node; auto iter = alive_nodes_.find(node_id); @@ -423,8 +423,8 @@ std::shared_ptr GcsNodeManager::RemoveNode( void GcsNodeManager::LoadInitialData(const EmptyCallback &done) { RAY_LOG(INFO) << "Loading initial data."; - auto get_node_callback = [this, done]( - const std::unordered_map &result) { + auto get_node_callback = [this, + done](const std::unordered_map &result) { for (auto &item : result) { if (item.second.state() == rpc::GcsNodeInfo::ALIVE) { // Call `AddNode` for this node to make sure it is tracked by the failure @@ -436,7 +436,7 @@ void GcsNodeManager::LoadInitialData(const EmptyCallback &done) { } auto get_node_resource_callback = - [this, done](const std::unordered_map &result) { + [this, done](const std::unordered_map &result) { for (auto &item : result) { if (alive_nodes_.count(item.first)) { cluster_resources_[item.first] = item.second; @@ -458,13 +458,13 @@ void GcsNodeManager::StartNodeFailureDetector() { } void GcsNodeManager::UpdateNodeRealtimeResources( - const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat) { + const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat) { auto resources_available = MapFromProtobuf(heartbeat.resources_available()); cluster_realtime_resources_[node_id] = std::make_shared(resources_available); } -const absl::flat_hash_map> +const absl::flat_hash_map> &GcsNodeManager::GetClusterRealtimeResources() const { return cluster_realtime_resources_; } diff --git a/src/ray/gcs/gcs_server/gcs_node_manager.h b/src/ray/gcs/gcs_server/gcs_node_manager.h index a818aa127..26a8b7904 100644 --- a/src/ray/gcs/gcs_server/gcs_node_manager.h +++ b/src/ray/gcs/gcs_server/gcs_node_manager.h @@ -99,20 +99,20 @@ class GcsNodeManager : public rpc::NodeInfoHandler { /// \param node_id The ID of the node to be removed. /// \param is_intended False if this is triggered by `node_failure_detector_`, else /// True. - std::shared_ptr RemoveNode(const ClientID &node_id, + std::shared_ptr RemoveNode(const NodeID &node_id, bool is_intended = false); /// Get alive node by ID. /// /// \param node_id The id of the node. /// \return the node if it is alive else return nullptr. - std::shared_ptr GetNode(const ClientID &node_id) const; + std::shared_ptr GetNode(const NodeID &node_id) const; /// Get all alive nodes. /// /// \return all alive nodes. - const absl::flat_hash_map> - &GetAllAliveNodes() const { + const absl::flat_hash_map> &GetAllAliveNodes() + const { return alive_nodes_; } @@ -144,11 +144,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler { void StartNodeFailureDetector(); // Update node realtime resources. - void UpdateNodeRealtimeResources(const ClientID &node_id, + void UpdateNodeRealtimeResources(const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat); /// Get cluster realtime resources. - const absl::flat_hash_map> + const absl::flat_hash_map> &GetClusterRealtimeResources() const; protected: @@ -165,7 +165,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler { boost::asio::io_service &io_service, std::shared_ptr gcs_table_storage, std::shared_ptr gcs_pub_sub, - std::function on_node_death_callback); + std::function on_node_death_callback); // Note: To avoid heartbeats being delayed by main thread, all public methods below // should be posted to its own IO service. @@ -177,13 +177,13 @@ class GcsNodeManager : public rpc::NodeInfoHandler { /// Only if the node has registered, its heartbeat data will be accepted. /// /// \param node_id ID of the node to be registered. - void AddNode(const ClientID &node_id); + void AddNode(const NodeID &node_id); /// Handle a heartbeat from a Raylet. /// /// \param node_id The client ID of the Raylet that sent the heartbeat. /// \param heartbeat_data The heartbeat sent by the client. - void HandleHeartbeat(const ClientID &node_id, + void HandleHeartbeat(const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat_data); protected: @@ -206,7 +206,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler { /// Storage for GCS tables. std::shared_ptr gcs_table_storage_; /// The callback of node death. - std::function on_node_death_callback_; + std::function on_node_death_callback_; /// The number of heartbeats that can be missed before a node is removed. int64_t num_heartbeats_timeout_; // Only the changed part will be included in heartbeat if this is true. @@ -215,9 +215,9 @@ class GcsNodeManager : public rpc::NodeInfoHandler { boost::asio::deadline_timer detect_timer_; /// For each Raylet that we receive a heartbeat from, the number of ticks /// that may pass before the Raylet will be declared dead. - absl::flat_hash_map heartbeats_; + absl::flat_hash_map heartbeats_; /// A buffer containing heartbeats received from node managers in the last tick. - absl::flat_hash_map heartbeat_buffer_; + absl::flat_hash_map heartbeat_buffer_; /// A publisher for publishing gcs messages. std::shared_ptr gcs_pub_sub_; /// Is the detect started. @@ -232,11 +232,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler { /// The event loop for node failure detector. boost::asio::io_service &node_failure_detector_service_; /// Alive nodes. - absl::flat_hash_map> alive_nodes_; + absl::flat_hash_map> alive_nodes_; /// Dead nodes. - absl::flat_hash_map> dead_nodes_; + absl::flat_hash_map> dead_nodes_; /// Cluster resources. - absl::flat_hash_map cluster_resources_; + absl::flat_hash_map cluster_resources_; /// Listeners which monitors the addition of nodes. std::vector)>> node_added_listeners_; @@ -248,7 +248,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler { /// Storage for GCS tables. std::shared_ptr gcs_table_storage_; /// Cluster realtime resources. - absl::flat_hash_map> cluster_realtime_resources_; + absl::flat_hash_map> cluster_realtime_resources_; }; } // namespace gcs diff --git a/src/ray/gcs/gcs_server/gcs_object_manager.cc b/src/ray/gcs/gcs_server/gcs_object_manager.cc index 48142c29a..406180e4a 100644 --- a/src/ray/gcs/gcs_server/gcs_object_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_object_manager.cc @@ -60,7 +60,7 @@ void GcsObjectManager::HandleAddObjectLocation( const rpc::AddObjectLocationRequest &request, rpc::AddObjectLocationReply *reply, rpc::SendReplyCallback send_reply_callback) { ObjectID object_id = ObjectID::FromBinary(request.object_id()); - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); RAY_LOG(DEBUG) << "Adding object location, job id = " << object_id.TaskId().JobId() << ", object id = " << object_id << ", node id = " << node_id; AddObjectLocationInCache(object_id, node_id); @@ -100,7 +100,7 @@ void GcsObjectManager::HandleRemoveObjectLocation( const rpc::RemoveObjectLocationRequest &request, rpc::RemoveObjectLocationReply *reply, rpc::SendReplyCallback send_reply_callback) { ObjectID object_id = ObjectID::FromBinary(request.object_id()); - ClientID node_id = ClientID::FromBinary(request.node_id()); + NodeID node_id = NodeID::FromBinary(request.node_id()); RAY_LOG(DEBUG) << "Removing object location, job id = " << object_id.TaskId().JobId() << ", object id = " << object_id << ", node id = " << node_id; RemoveObjectLocationInCache(object_id, node_id); @@ -143,7 +143,7 @@ void GcsObjectManager::HandleRemoveObjectLocation( } void GcsObjectManager::AddObjectsLocation( - const ClientID &node_id, const absl::flat_hash_set &object_ids) { + const NodeID &node_id, const absl::flat_hash_set &object_ids) { // TODO(micafan) Optimize the lock when necessary. // Maybe use read/write lock. Or reduce the granularity of the lock. absl::MutexLock lock(&mutex_); @@ -159,7 +159,7 @@ void GcsObjectManager::AddObjectsLocation( } void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id, - const ClientID &node_id) { + const NodeID &node_id) { absl::MutexLock lock(&mutex_); auto *objects_on_node = GetObjectSetByNode(node_id, /* create_if_not_exist */ true); @@ -170,7 +170,7 @@ void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id, object_locations->emplace(node_id); } -absl::flat_hash_set GcsObjectManager::GetObjectLocations( +absl::flat_hash_set GcsObjectManager::GetObjectLocations( const ObjectID &object_id) { absl::MutexLock lock(&mutex_); @@ -178,10 +178,10 @@ absl::flat_hash_set GcsObjectManager::GetObjectLocations( if (object_locations) { return *object_locations; } - return absl::flat_hash_set{}; + return absl::flat_hash_set{}; } -void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) { +void GcsObjectManager::OnNodeRemoved(const NodeID &node_id) { absl::MutexLock lock(&mutex_); ObjectSet objects_on_node; @@ -207,7 +207,7 @@ void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) { } void GcsObjectManager::RemoveObjectLocationInCache(const ObjectID &object_id, - const ClientID &node_id) { + const NodeID &node_id) { absl::MutexLock lock(&mutex_); auto *object_locations = GetObjectLocationSet(object_id); @@ -244,7 +244,7 @@ GcsObjectManager::LocationSet *GcsObjectManager::GetObjectLocationSet( } GcsObjectManager::ObjectSet *GcsObjectManager::GetObjectSetByNode( - const ClientID &node_id, bool create_if_not_exist) { + const NodeID &node_id, bool create_if_not_exist) { ObjectSet *objects_on_node = nullptr; auto it = node_to_objects_.find(node_id); @@ -271,11 +271,11 @@ void GcsObjectManager::LoadInitialData(const EmptyCallback &done) { RAY_LOG(INFO) << "Loading initial data."; auto callback = [this, done]( const std::unordered_map &result) { - absl::flat_hash_map node_to_objects; + absl::flat_hash_map node_to_objects; for (auto &item : result) { auto object_list = item.second; for (int index = 0; index < object_list.items_size(); ++index) { - node_to_objects[ClientID::FromBinary(object_list.items(index).manager())].insert( + node_to_objects[NodeID::FromBinary(object_list.items(index).manager())].insert( item.first); } } diff --git a/src/ray/gcs/gcs_server/gcs_object_manager.h b/src/ray/gcs/gcs_server/gcs_object_manager.h index f0e9c7a89..6555da1ba 100644 --- a/src/ray/gcs/gcs_server/gcs_object_manager.h +++ b/src/ray/gcs/gcs_server/gcs_object_manager.h @@ -33,7 +33,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { [this](const std::shared_ptr &node) { // All of the related actors should be reconstructed when a node is removed from // the GCS. - OnNodeRemoved(ClientID::FromBinary(node->node_id())); + OnNodeRemoved(NodeID::FromBinary(node->node_id())); }); } @@ -60,14 +60,14 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { void LoadInitialData(const EmptyCallback &done); protected: - typedef absl::flat_hash_set LocationSet; + typedef absl::flat_hash_set LocationSet; /// Add a location of objects. /// If the GCS server restarts, this function is used to reload data from storage. /// /// \param node_id The object location that will be added. /// \param object_ids The ids of objects which location will be added. - void AddObjectsLocation(const ClientID &node_id, + void AddObjectsLocation(const NodeID &node_id, const absl::flat_hash_set &object_ids) LOCKS_EXCLUDED(mutex_); @@ -75,7 +75,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { /// /// \param object_id The id of object. /// \param node_id The node id of the new location. - void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) + void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) LOCKS_EXCLUDED(mutex_); /// Get all locations of the given object. @@ -87,13 +87,13 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { /// Handler if a node is removed. /// /// \param node_id The node that will be removed. - void OnNodeRemoved(const ClientID &node_id) LOCKS_EXCLUDED(mutex_); + void OnNodeRemoved(const NodeID &node_id) LOCKS_EXCLUDED(mutex_); /// Remove object's location. /// /// \param object_id The id of the object which location will be removed. /// \param node_id The location that will be removed. - void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) + void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) LOCKS_EXCLUDED(mutex_); private: @@ -118,7 +118,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { /// \param node_id The id of node to lookup. /// \param create_if_not_exist Whether to create a new one if not exist. /// \return ObjectSet * - GcsObjectManager::ObjectSet *GetObjectSetByNode(const ClientID &node_id, + GcsObjectManager::ObjectSet *GetObjectSetByNode(const NodeID &node_id, bool create_if_not_exist = false) EXCLUSIVE_LOCKS_REQUIRED(mutex_); @@ -130,7 +130,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler { /// Mapping from node id to objects that held by the node. /// This is the local cache of nodes' objects in the storage. - absl::flat_hash_map node_to_objects_ GUARDED_BY(mutex_); + absl::flat_hash_map node_to_objects_ GUARDED_BY(mutex_); std::shared_ptr gcs_table_storage_; std::shared_ptr gcs_pub_sub_; diff --git a/src/ray/gcs/gcs_server/gcs_placement_group_manager.cc b/src/ray/gcs/gcs_server/gcs_placement_group_manager.cc index c4d16c993..846ed9cc1 100644 --- a/src/ray/gcs/gcs_server/gcs_placement_group_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_placement_group_manager.cc @@ -53,7 +53,7 @@ std::vector> GcsPlacementGroup::GetUnplaced const auto &bundles = placement_group_table_data_.bundles(); std::vector> unplaced_bundles; for (auto &bundle : bundles) { - if (ClientID::FromBinary(bundle.node_id()).IsNil()) { + if (NodeID::FromBinary(bundle.node_id()).IsNil()) { unplaced_bundles.push_back(std::make_shared(bundle)); } } @@ -326,7 +326,7 @@ void GcsPlacementGroupManager::RetryCreatingPlacementGroup() { RayConfig::instance().gcs_create_placement_group_retry_interval_ms()); } -void GcsPlacementGroupManager::OnNodeDead(const ClientID &node_id) { +void GcsPlacementGroupManager::OnNodeDead(const NodeID &node_id) { RAY_LOG(WARNING) << "Node " << node_id << " failed, rescheduling the placement groups on the dead node."; auto bundles = gcs_placement_group_scheduler_->GetBundlesOnNode(node_id); diff --git a/src/ray/gcs/gcs_server/gcs_placement_group_manager.h b/src/ray/gcs/gcs_server/gcs_placement_group_manager.h index ce21b8d01..b5e275739 100644 --- a/src/ray/gcs/gcs_server/gcs_placement_group_manager.h +++ b/src/ray/gcs/gcs_server/gcs_placement_group_manager.h @@ -167,7 +167,7 @@ class GcsPlacementGroupManager : public rpc::PlacementGroupInfoHandler { /// specified node id. /// /// \param node_id The specified node id. - void OnNodeDead(const ClientID &node_id); + void OnNodeDead(const NodeID &node_id); private: /// Try to create placement group after a short time. diff --git a/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.cc b/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.cc index 122dc877e..1cc7abfb2 100644 --- a/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.cc +++ b/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.cc @@ -46,7 +46,7 @@ ScheduleMap GcsStrictPackStrategy::Schedule( // Filter candidate nodes. const auto &alive_nodes = context->node_manager_.GetClusterRealtimeResources(); - std::vector> candidate_nodes; + std::vector> candidate_nodes; for (auto &node : alive_nodes) { if (required_resources.IsSubset(*node.second)) { candidate_nodes.emplace_back((*context->node_to_bundles_)[node.first], node.first); @@ -61,8 +61,8 @@ ScheduleMap GcsStrictPackStrategy::Schedule( std::sort( std::begin(candidate_nodes), std::end(candidate_nodes), - [](const std::pair &left, - const std::pair &right) { return left.first < right.first; }); + [](const std::pair &left, + const std::pair &right) { return left.first < right.first; }); for (auto &bundle : bundles) { schedule_map[bundle->BundleId()] = candidate_nodes.front().second; @@ -287,7 +287,7 @@ void GcsPlacementGroupScheduler::PrepareResources( const std::shared_ptr &bundle, const std::shared_ptr &node, const StatusCallback &callback) { const auto lease_client = GetLeaseClientFromNode(node); - const auto node_id = ClientID::FromBinary(node->node_id()); + const auto node_id = NodeID::FromBinary(node->node_id()); RAY_LOG(INFO) << "Preparing resource from node " << node_id << " for a bundle: " << bundle->DebugString(); lease_client->PrepareBundleResources( @@ -311,7 +311,7 @@ void GcsPlacementGroupScheduler::CommitResources( const std::shared_ptr &node, const StatusCallback callback) { RAY_CHECK(node != nullptr); const auto lease_client = GetLeaseClientFromNode(node); - const auto node_id = ClientID::FromBinary(node->node_id()); + const auto node_id = NodeID::FromBinary(node->node_id()); RAY_LOG(INFO) << "Committing resource to a node " << node_id << " for a bundle: " << bundle->DebugString(); lease_client->CommitBundleResources( @@ -339,7 +339,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve( << " has already removed. Cancellation request will be ignored."; return; } - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); RAY_LOG(INFO) << "Cancelling the resource reserved for bundle: " << bundle_spec->DebugString() << " at node " << node_id; const auto return_client = GetLeaseClientFromNode(node); @@ -353,7 +353,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve( std::shared_ptr GcsPlacementGroupScheduler::GetOrConnectLeaseClient(const rpc::Address &raylet_address) { - auto node_id = ClientID::FromBinary(raylet_address.raylet_id()); + auto node_id = NodeID::FromBinary(raylet_address.raylet_id()); auto iter = remote_lease_clients_.find(node_id); if (iter == remote_lease_clients_.end()) { auto lease_client = lease_client_factory_(raylet_address); @@ -499,7 +499,7 @@ std::unique_ptr GcsPlacementGroupScheduler::GetScheduleContext( auto &alive_nodes = gcs_node_manager_.GetAllAliveNodes(); committed_bundle_location_index_.AddNodes(alive_nodes); - auto node_to_bundles = std::make_shared>(); + auto node_to_bundles = std::make_shared>(); for (const auto &node_it : alive_nodes) { const auto &node_id = node_it.first; const auto &bundle_locations_on_node = @@ -517,7 +517,7 @@ std::unique_ptr GcsPlacementGroupScheduler::GetScheduleContext( } absl::flat_hash_map> -GcsPlacementGroupScheduler::GetBundlesOnNode(const ClientID &node_id) { +GcsPlacementGroupScheduler::GetBundlesOnNode(const NodeID &node_id) { absl::flat_hash_map> bundles_on_node; const auto &maybe_bundle_locations = committed_bundle_location_index_.GetBundleLocationsOnNode(node_id); @@ -546,7 +546,7 @@ void BundleLocationIndex::AddBundleLocations( } } -bool BundleLocationIndex::Erase(const ClientID &node_id) { +bool BundleLocationIndex::Erase(const NodeID &node_id) { const auto leased_bundles_it = node_to_leased_bundles_.find(node_id); if (leased_bundles_it == node_to_leased_bundles_.end()) { return false; @@ -604,7 +604,7 @@ BundleLocationIndex::GetBundleLocations(const PlacementGroupID &placement_group_ } const absl::optional const> -BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) { +BundleLocationIndex::GetBundleLocationsOnNode(const NodeID &node_id) { auto it = node_to_leased_bundles_.find(node_id); if (it == node_to_leased_bundles_.end()) { return {}; @@ -613,7 +613,7 @@ BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) { } void BundleLocationIndex::AddNodes( - const absl::flat_hash_map> &nodes) { + const absl::flat_hash_map> &nodes) { for (const auto &iter : nodes) { if (!node_to_leased_bundles_.contains(iter.first)) { node_to_leased_bundles_[iter.first] = std::make_shared(); @@ -630,13 +630,13 @@ LeaseStatusTracker::LeaseStatusTracker( } bool LeaseStatusTracker::MarkPreparePhaseStarted( - const ClientID &node_id, std::shared_ptr bundle) { + const NodeID &node_id, std::shared_ptr bundle) { const auto &bundle_id = bundle->BundleId(); return node_to_bundles_when_preparing_[node_id].emplace(bundle_id).second; } void LeaseStatusTracker::MarkPrepareRequestReturned( - const ClientID &node_id, const std::shared_ptr bundle, + const NodeID &node_id, const std::shared_ptr bundle, const Status &status) { RAY_CHECK(prepare_request_returned_count_ <= bundles_to_schedule_.size()); auto leasing_bundles = node_to_bundles_when_preparing_.find(node_id); @@ -670,7 +670,7 @@ bool LeaseStatusTracker::AllPrepareRequestsSuccessful() const { } void LeaseStatusTracker::MarkCommitRequestReturned( - const ClientID &node_id, const std::shared_ptr bundle, + const NodeID &node_id, const std::shared_ptr bundle, const Status &status) { commit_request_returned_count_ += 1; // If the request succeeds, record it. diff --git a/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.h b/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.h index be9b998ee..2e8598ee5 100644 --- a/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.h +++ b/src/ray/gcs/gcs_server/gcs_placement_group_scheduler.h @@ -37,9 +37,10 @@ struct pair_hash { return std::hash()(pair.first) ^ std::hash()(pair.second); } }; -using ScheduleMap = std::unordered_map; -using BundleLocations = absl::flat_hash_map< - BundleID, std::pair>, pair_hash>; +using ScheduleMap = std::unordered_map; +using BundleLocations = + absl::flat_hash_map>, + pair_hash>; class GcsPlacementGroup; @@ -60,7 +61,7 @@ class GcsPlacementGroupSchedulerInterface { /// \param node_id ID of the dead node. /// \return The bundles belong to the dead node. virtual absl::flat_hash_map> GetBundlesOnNode( - const ClientID &node_id) = 0; + const NodeID &node_id) = 0; /// Destroy bundle resources from all nodes in the placement group. virtual void DestroyPlacementGroupBundleResourcesIfExists( @@ -79,7 +80,7 @@ class GcsPlacementGroupSchedulerInterface { /// ScheduleContext provides information that are needed for bundle scheduling decision. class ScheduleContext { public: - ScheduleContext(std::shared_ptr> node_to_bundles, + ScheduleContext(std::shared_ptr> node_to_bundles, const absl::optional> bundle_locations, const GcsNodeManager &node_manager) : node_to_bundles_(std::move(node_to_bundles)), @@ -87,7 +88,7 @@ class ScheduleContext { node_manager_(node_manager) {} // Key is node id, value is the number of bundles on the node. - const std::shared_ptr> node_to_bundles_; + const std::shared_ptr> node_to_bundles_; // The locations of existing bundles for this placement group. const absl::optional> bundle_locations_; @@ -158,7 +159,7 @@ class LeaseStatusTracker { /// \param node_id Id of a node where prepare request is sent. /// \param bundle Bundle specification the node is supposed to prepare. /// \return False if the prepare phase was already started. True otherwise. - bool MarkPreparePhaseStarted(const ClientID &node_id, + bool MarkPreparePhaseStarted(const NodeID &node_id, std::shared_ptr bundle); /// Indicate the tracker that all prepare requests are returned. @@ -167,7 +168,7 @@ class LeaseStatusTracker { /// \param bundle Bundle specification the node was supposed to schedule. /// \param status Status of the prepare response. /// \param void - void MarkPrepareRequestReturned(const ClientID &node_id, + void MarkPrepareRequestReturned(const NodeID &node_id, std::shared_ptr bundle, const Status &status); @@ -186,7 +187,7 @@ class LeaseStatusTracker { /// \param node_id Id of a node where commit request is returned. /// \param bundle Bundle specification the node was supposed to schedule. /// \param status Status of the returned commit request. - void MarkCommitRequestReturned(const ClientID &node_id, + void MarkCommitRequestReturned(const NodeID &node_id, const std::shared_ptr bundle, const Status &status); @@ -245,7 +246,7 @@ class LeaseStatusTracker { /// Location of bundles that prepare requests were sent. /// If prepare succeeds, the decision will be set as schedule_map[bundles[pos]] - /// else will be set ClientID::Nil(). + /// else will be set NodeID::Nil(). std::shared_ptr preparing_bundle_locations_; /// Number of prepare requests that are returned. @@ -264,7 +265,7 @@ class LeaseStatusTracker { /// from that node. This is needed so that we can retry lease requests from the node /// until we receive a reply or the node is removed. /// TODO(sang): We don't currently handle retry. - absl::flat_hash_map> + absl::flat_hash_map> node_to_bundles_when_preparing_; /// Bundles to schedule. @@ -289,7 +290,7 @@ class BundleLocationIndex { /// /// \param node_id The id of node. /// \return True if succeed. False otherwise. - bool Erase(const ClientID &node_id); + bool Erase(const NodeID &node_id); /// Erase bundle locations associated with a given placement group id. /// @@ -309,19 +310,19 @@ class BundleLocationIndex { /// \param node_id Node id of this bundle locations. /// \return Bundle locations that are associated with a given node id. const absl::optional const> GetBundleLocationsOnNode( - const ClientID &node_id); + const NodeID &node_id); /// Update the index to contain new node information. Should be used only when new node /// is added to the cluster. /// /// \param alive_nodes map of alive nodes. void AddNodes( - const absl::flat_hash_map> &nodes); + const absl::flat_hash_map> &nodes); private: /// Map from node ID to the set of bundles. This is used to lookup bundles at each node /// when a node is dead. - absl::flat_hash_map> node_to_leased_bundles_; + absl::flat_hash_map> node_to_leased_bundles_; /// A map from placement group id to bundle locations. /// It is used to destroy bundles for the placement group. @@ -382,7 +383,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface { /// \param node_id ID of the dead node. /// \return The bundles belong to the dead node. absl::flat_hash_map> GetBundlesOnNode( - const ClientID &node_id) override; + const NodeID &node_id) override; protected: /// Send a bundle PREPARE request to a node. The PREPARE request will lock resources @@ -459,7 +460,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface { const GcsNodeManager &gcs_node_manager_; /// The cached node clients which are used to communicate with raylet to lease workers. - absl::flat_hash_map> + absl::flat_hash_map> remote_lease_clients_; /// Factory for producing new clients to request leases from remote nodes. diff --git a/src/ray/gcs/gcs_server/gcs_server.cc b/src/ray/gcs/gcs_server/gcs_server.cc index bc1254246..b0f8a9bbb 100644 --- a/src/ray/gcs/gcs_server/gcs_server.cc +++ b/src/ray/gcs/gcs_server/gcs_server.cc @@ -206,8 +206,8 @@ void GcsServer::InitGcsActorManager() { [this](std::shared_ptr node) { // All of the related placement groups and actors should be reconstructed when a // node is removed from the GCS. - gcs_placement_group_manager_->OnNodeDead(ClientID::FromBinary(node->node_id())); - gcs_actor_manager_->OnNodeDead(ClientID::FromBinary(node->node_id())); + gcs_placement_group_manager_->OnNodeDead(NodeID::FromBinary(node->node_id())); + gcs_actor_manager_->OnNodeDead(NodeID::FromBinary(node->node_id())); }); auto on_subscribe = [this](const std::string &id, const std::string &data) { @@ -215,7 +215,7 @@ void GcsServer::InitGcsActorManager() { worker_failure_data.ParseFromString(data); auto &worker_address = worker_failure_data.worker_address(); WorkerID worker_id = WorkerID::FromBinary(id); - ClientID node_id = ClientID::FromBinary(worker_address.raylet_id()); + NodeID node_id = NodeID::FromBinary(worker_address.raylet_id()); gcs_actor_manager_->OnWorkerDead(node_id, worker_id, worker_failure_data.intentional_disconnect()); }; diff --git a/src/ray/gcs/gcs_server/gcs_table_storage.cc b/src/ray/gcs/gcs_server/gcs_table_storage.cc index 0f4dfbb81..234a63855 100644 --- a/src/ray/gcs/gcs_server/gcs_table_storage.cc +++ b/src/ray/gcs/gcs_server/gcs_table_storage.cc @@ -127,10 +127,10 @@ Status GcsTableWithJobId::BatchDelete(const std::vector &keys, } template class GcsTable; -template class GcsTable; -template class GcsTable; -template class GcsTable; -template class GcsTable; +template class GcsTable; +template class GcsTable; +template class GcsTable; +template class GcsTable; template class GcsTable; template class GcsTable; template class GcsTable; diff --git a/src/ray/gcs/gcs_server/gcs_table_storage.h b/src/ray/gcs/gcs_server/gcs_table_storage.h index 54df38858..eff54cc11 100644 --- a/src/ray/gcs/gcs_server/gcs_table_storage.h +++ b/src/ray/gcs/gcs_server/gcs_table_storage.h @@ -245,7 +245,7 @@ class GcsObjectTable : public GcsTableWithJobId { JobID GetJobIdFromKey(const ObjectID &key) override { return key.TaskId().JobId(); } }; -class GcsNodeTable : public GcsTable { +class GcsNodeTable : public GcsTable { public: explicit GcsNodeTable(std::shared_ptr &store_client) : GcsTable(store_client) { @@ -253,7 +253,7 @@ class GcsNodeTable : public GcsTable { } }; -class GcsNodeResourceTable : public GcsTable { +class GcsNodeResourceTable : public GcsTable { public: explicit GcsNodeResourceTable(std::shared_ptr &store_client) : GcsTable(store_client) { @@ -261,7 +261,7 @@ class GcsNodeResourceTable : public GcsTable { } }; -class GcsHeartbeatTable : public GcsTable { +class GcsHeartbeatTable : public GcsTable { public: explicit GcsHeartbeatTable(std::shared_ptr &store_client) : GcsTable(store_client) { @@ -277,7 +277,7 @@ class GcsPlacementGroupScheduleTable : public GcsTable { +class GcsHeartbeatBatchTable : public GcsTable { public: explicit GcsHeartbeatBatchTable(std::shared_ptr &store_client) : GcsTable(store_client) { diff --git a/src/ray/gcs/gcs_server/gcs_worker_manager.cc b/src/ray/gcs/gcs_server/gcs_worker_manager.cc index 3aa2898ef..e8d53d858 100644 --- a/src/ray/gcs/gcs_server/gcs_worker_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_worker_manager.cc @@ -22,7 +22,7 @@ void GcsWorkerManager::HandleReportWorkerFailure( rpc::SendReplyCallback send_reply_callback) { const rpc::Address worker_address = request.worker_failure().worker_address(); const auto worker_id = WorkerID::FromBinary(worker_address.worker_id()); - const auto node_id = ClientID::FromBinary(worker_address.raylet_id()); + const auto node_id = NodeID::FromBinary(worker_address.raylet_id()); std::stringstream log_stream; log_stream << "Reporting worker failure, worker id = " << worker_id << ", node id = " << node_id diff --git a/src/ray/gcs/gcs_server/stats_handler_impl.cc b/src/ray/gcs/gcs_server/stats_handler_impl.cc index 6adce330b..57135c830 100644 --- a/src/ray/gcs/gcs_server/stats_handler_impl.cc +++ b/src/ray/gcs/gcs_server/stats_handler_impl.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "ray/gcs/gcs_server/stats_handler_impl.h" + #include "ray/common/ray_config.h" namespace ray { @@ -21,7 +22,7 @@ namespace rpc { void DefaultStatsHandler::HandleAddProfileData(const AddProfileDataRequest &request, AddProfileDataReply *reply, SendReplyCallback send_reply_callback) { - ClientID node_id = ClientID::FromBinary(request.profile_data().component_id()); + NodeID node_id = NodeID::FromBinary(request.profile_data().component_id()); RAY_LOG(DEBUG) << "Adding profile data, component type = " << request.profile_data().component_type() << ", node id = " << node_id; auto profile_table_data = std::make_shared(); diff --git a/src/ray/gcs/gcs_server/task_info_handler_impl.cc b/src/ray/gcs/gcs_server/task_info_handler_impl.cc index 9595cb0c9..794b6e653 100644 --- a/src/ray/gcs/gcs_server/task_info_handler_impl.cc +++ b/src/ray/gcs/gcs_server/task_info_handler_impl.cc @@ -93,7 +93,7 @@ void DefaultTaskInfoHandler::HandleAddTaskLease(const AddTaskLeaseRequest &reque AddTaskLeaseReply *reply, SendReplyCallback send_reply_callback) { TaskID task_id = TaskID::FromBinary(request.task_lease_data().task_id()); - ClientID node_id = ClientID::FromBinary(request.task_lease_data().node_manager_id()); + NodeID node_id = NodeID::FromBinary(request.task_lease_data().node_manager_id()); RAY_LOG(DEBUG) << "Adding task lease, job id = " << task_id.JobId() << ", task id = " << task_id << ", node id = " << node_id; auto on_done = [this, task_id, node_id, request, reply, @@ -144,8 +144,7 @@ void DefaultTaskInfoHandler::HandleAttemptTaskReconstruction( const AttemptTaskReconstructionRequest &request, AttemptTaskReconstructionReply *reply, SendReplyCallback send_reply_callback) { TaskID task_id = TaskID::FromBinary(request.task_reconstruction().task_id()); - ClientID node_id = - ClientID::FromBinary(request.task_reconstruction().node_manager_id()); + NodeID node_id = NodeID::FromBinary(request.task_reconstruction().node_manager_id()); RAY_LOG(DEBUG) << "Reconstructing task, job id = " << task_id.JobId() << ", task id = " << task_id << ", reconstructions num = " << request.task_reconstruction().num_reconstructions() diff --git a/src/ray/gcs/gcs_server/test/gcs_actor_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_actor_manager_test.cc index b675303c2..c4d6fa7e5 100644 --- a/src/ray/gcs/gcs_server/test/gcs_actor_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_actor_manager_test.cc @@ -31,12 +31,11 @@ class MockActorScheduler : public gcs::GcsActorSchedulerInterface { void Schedule(std::shared_ptr actor) { actors.push_back(actor); } void Reschedule(std::shared_ptr actor) {} void ReleaseUnusedWorkers( - const std::unordered_map> &node_to_workers) {} + const std::unordered_map> &node_to_workers) {} - MOCK_METHOD1(CancelOnNode, std::vector(const ClientID &node_id)); - MOCK_METHOD2(CancelOnWorker, - ActorID(const ClientID &node_id, const WorkerID &worker_id)); - MOCK_METHOD2(CancelOnLeasing, void(const ClientID &node_id, const ActorID &actor_id)); + MOCK_METHOD1(CancelOnNode, std::vector(const NodeID &node_id)); + MOCK_METHOD2(CancelOnWorker, ActorID(const NodeID &node_id, const WorkerID &worker_id)); + MOCK_METHOD2(CancelOnLeasing, void(const NodeID &node_id, const ActorID &actor_id)); std::vector> actors; }; @@ -121,7 +120,7 @@ class GcsActorManagerTest : public ::testing::Test { rpc::Address RandomAddress() const { rpc::Address address; - auto node_id = ClientID::FromRandom(); + auto node_id = NodeID::FromRandom(); auto worker_id = WorkerID::FromRandom(); address.set_raylet_id(node_id.Binary()); address.set_worker_id(worker_id.Binary()); @@ -238,7 +237,7 @@ TEST_F(GcsActorManagerTest, TestWorkerFailure) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); auto worker_id = WorkerID::FromBinary(address.worker_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); @@ -281,7 +280,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); WaitActorCreated(actor->GetActorID()); @@ -289,7 +288,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) { // Killing another node does not affect this actor. EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_)); - gcs_actor_manager_->OnNodeDead(ClientID::FromRandom()); + gcs_actor_manager_->OnNodeDead(NodeID::FromRandom()); ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE); // Remove node and then check that the actor is dead. @@ -325,7 +324,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); WaitActorCreated(actor->GetActorID()); @@ -341,7 +340,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) { ASSERT_EQ(mock_actor_scheduler_->actors.size(), 1); mock_actor_scheduler_->actors.clear(); ASSERT_EQ(finished_actors.size(), 1); - auto node_id2 = ClientID::FromRandom(); + auto node_id2 = NodeID::FromRandom(); address.set_raylet_id(node_id2.Binary()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); @@ -352,7 +351,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) { // Killing another worker does not affect this actor. EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_)); - gcs_actor_manager_->OnNodeDead(ClientID::FromRandom()); + gcs_actor_manager_->OnNodeDead(NodeID::FromRandom()); ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE); // Remove worker and then check that the actor is dead. @@ -388,7 +387,7 @@ TEST_F(GcsActorManagerTest, TestActorRestartWhenOwnerDead) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); WaitActorCreated(actor->GetActorID()); @@ -532,7 +531,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionWorkerFailure) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); auto worker_id = WorkerID::FromBinary(address.worker_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); @@ -578,7 +577,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNodeFailure) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); WaitActorCreated(actor->GetActorID()); @@ -624,7 +623,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNotHappendWhenReconstructed) { // Check that the actor is in state `ALIVE`. auto address = RandomAddress(); - auto node_id = ClientID::FromBinary(address.raylet_id()); + auto node_id = NodeID::FromBinary(address.raylet_id()); auto worker_id = WorkerID::FromBinary(address.worker_id()); actor->UpdateAddress(address); gcs_actor_manager_->OnActorCreationSuccess(actor); @@ -699,7 +698,7 @@ TEST_F(GcsActorManagerTest, TestRaceConditionCancelLease) { // Check that the actor is in state `ALIVE`. rpc::Address address; - auto node_id = ClientID::FromRandom(); + auto node_id = NodeID::FromRandom(); auto worker_id = WorkerID::FromRandom(); address.set_raylet_id(node_id.Binary()); address.set_worker_id(worker_id.Binary()); @@ -742,7 +741,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeActorDependenciesResolved) { auto job_id = JobID::FromInt(1); auto registered_actor = RegisterActor(job_id); const auto &owner_address = registered_actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); auto worker_id = WorkerID::FromBinary(owner_address.worker_id()); gcs_actor_manager_->OnWorkerDead(node_id, worker_id); ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD); @@ -758,7 +757,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeDetachedActorDependenciesRes auto job_id = JobID::FromInt(1); auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true); const auto &owner_address = registered_actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); auto worker_id = WorkerID::FromBinary(owner_address.worker_id()); gcs_actor_manager_->OnWorkerDead(node_id, worker_id); ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD); @@ -774,7 +773,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeActorDependenciesResolved) { auto job_id = JobID::FromInt(1); auto registered_actor = RegisterActor(job_id); const auto &owner_address = registered_actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); gcs_actor_manager_->OnNodeDead(node_id); ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD); @@ -789,7 +788,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeDetachedActorDependenciesResol auto job_id = JobID::FromInt(1); auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true); const auto &owner_address = registered_actor->GetOwnerAddress(); - auto node_id = ClientID::FromBinary(owner_address.raylet_id()); + auto node_id = NodeID::FromBinary(owner_address.raylet_id()); gcs_actor_manager_->OnNodeDead(node_id); ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD); diff --git a/src/ray/gcs/gcs_server/test/gcs_actor_scheduler_test.cc b/src/ray/gcs/gcs_server/test/gcs_actor_scheduler_test.cc index 5d8f098bc..00460a8b2 100644 --- a/src/ray/gcs/gcs_server/test/gcs_actor_scheduler_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_actor_scheduler_test.cc @@ -84,7 +84,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleFailedWithZeroNode) { TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -103,7 +103,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) { WorkerID worker_id = WorkerID::FromRandom(); ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(), node->node_manager_port(), worker_id, - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -119,7 +119,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) { TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -138,7 +138,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) { // Mock a IOError reply, then the lease request will retry again. ASSERT_TRUE(raylet_client_->GrantWorkerLease( node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(), - node_id, ClientID::Nil(), Status::IOError(""))); + node_id, NodeID::Nil(), Status::IOError(""))); ASSERT_EQ(1, gcs_actor_scheduler_->num_retry_leasing_count_); ASSERT_EQ(2, raylet_client_->num_workers_requested); ASSERT_EQ(1, raylet_client_->callbacks.size()); @@ -148,7 +148,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) { WorkerID worker_id = WorkerID::FromRandom(); ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(), node->node_manager_port(), worker_id, - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -164,7 +164,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) { TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -183,7 +183,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) { WorkerID worker_id = WorkerID::FromRandom(); ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(), node->node_manager_port(), worker_id, - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_creating_count_); @@ -205,7 +205,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) { TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -232,7 +232,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) { // Grant a worker, which will influence nothing. ASSERT_TRUE(raylet_client_->GrantWorkerLease( node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(), - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(1, raylet_client_->num_workers_requested); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_); @@ -243,7 +243,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) { TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -265,7 +265,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) { // Grant a worker, which will influence nothing. ASSERT_TRUE(raylet_client_->GrantWorkerLease( node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(), - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(1, raylet_client_->num_workers_requested); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_); @@ -276,7 +276,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) { TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -294,7 +294,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) { // Grant a worker, then the actor creation request should be send to the worker. ASSERT_TRUE(raylet_client_->GrantWorkerLease( node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(), - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -318,7 +318,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) { TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) { auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -337,7 +337,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) { auto worker_id = WorkerID::FromRandom(); ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(), node->node_manager_port(), worker_id, - node_id, ClientID::Nil())); + node_id, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -357,7 +357,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) { TEST_F(GcsActorSchedulerTest, TestSpillback) { auto node1 = Mocker::GenNodeInfo(); - auto node_id_1 = ClientID::FromBinary(node1->node_id()); + auto node_id_1 = NodeID::FromBinary(node1->node_id()); gcs_node_manager_->AddNode(node1); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -374,12 +374,12 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) { // Add another node. auto node2 = Mocker::GenNodeInfo(); - auto node_id_2 = ClientID::FromBinary(node2->node_id()); + auto node_id_2 = NodeID::FromBinary(node2->node_id()); gcs_node_manager_->AddNode(node2); ASSERT_EQ(2, gcs_node_manager_->GetAllAliveNodes().size()); // Grant with an invalid spillback node, and schedule again. - auto invalid_node_id = ClientID::FromBinary(Mocker::GenNodeInfo()->node_id()); + auto invalid_node_id = NodeID::FromBinary(Mocker::GenNodeInfo()->node_id()); ASSERT_TRUE(raylet_client_->GrantWorkerLease( node2->node_manager_address(), node2->node_manager_port(), WorkerID::Nil(), node_id_1, invalid_node_id)); @@ -400,7 +400,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) { WorkerID worker_id = WorkerID::FromRandom(); ASSERT_TRUE(raylet_client_->GrantWorkerLease(node2->node_manager_address(), node2->node_manager_port(), worker_id, - node_id_2, ClientID::Nil())); + node_id_2, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -417,7 +417,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) { TEST_F(GcsActorSchedulerTest, TestReschedule) { auto node1 = Mocker::GenNodeInfo(); - auto node_id_1 = ClientID::FromBinary(node1->node_id()); + auto node_id_1 = NodeID::FromBinary(node1->node_id()); gcs_node_manager_->AddNode(node1); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); @@ -452,7 +452,7 @@ TEST_F(GcsActorSchedulerTest, TestReschedule) { // Grant a worker, then the actor creation request should be send to the worker. ASSERT_TRUE(raylet_client_->GrantWorkerLease(node1->node_manager_address(), node1->node_manager_port(), worker_id, - node_id_1, ClientID::Nil())); + node_id_1, NodeID::Nil())); ASSERT_EQ(0, raylet_client_->callbacks.size()); ASSERT_EQ(1, worker_client_->callbacks.size()); @@ -470,12 +470,12 @@ TEST_F(GcsActorSchedulerTest, TestReleaseUnusedWorkers) { // Add a node to the cluster. auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); gcs_node_manager_->AddNode(node); ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size()); // Send a `ReleaseUnusedWorkers` request to the node. - std::unordered_map> node_to_workers; + std::unordered_map> node_to_workers; node_to_workers[node_id].push_back({WorkerID::FromRandom()}); gcs_actor_scheduler_->ReleaseUnusedWorkers(node_to_workers); ASSERT_EQ(1, raylet_client_->num_release_unused_workers); diff --git a/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc index 78deece92..a01f83b5e 100644 --- a/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_node_manager_test.cc @@ -37,7 +37,7 @@ TEST_F(GcsNodeManagerTest, TestManagement) { gcs_table_storage_); // Test Add/Get/Remove functionality. auto node = Mocker::GenNodeInfo(); - auto node_id = ClientID::FromBinary(node->node_id()); + auto node_id = NodeID::FromBinary(node->node_id()); node_manager.AddNode(node); ASSERT_EQ(node, node_manager.GetNode(node_id)); @@ -67,7 +67,7 @@ TEST_F(GcsNodeManagerTest, TestListener) { auto &alive_nodes = node_manager.GetAllAliveNodes(); ASSERT_EQ(added_nodes.size(), alive_nodes.size()); for (const auto &node : added_nodes) { - ASSERT_EQ(1, alive_nodes.count(ClientID::FromBinary(node->node_id()))); + ASSERT_EQ(1, alive_nodes.count(NodeID::FromBinary(node->node_id()))); } // Test AddNodeRemovedListener. @@ -77,7 +77,7 @@ TEST_F(GcsNodeManagerTest, TestListener) { removed_nodes.emplace_back(std::move(node)); }); for (int i = 0; i < node_count; ++i) { - node_manager.RemoveNode(ClientID::FromBinary(added_nodes[i]->node_id())); + node_manager.RemoveNode(NodeID::FromBinary(added_nodes[i]->node_id())); } ASSERT_EQ(node_count, removed_nodes.size()); ASSERT_TRUE(node_manager.GetAllAliveNodes().empty()); @@ -91,7 +91,7 @@ TEST_F(GcsNodeManagerTest, TestGetClusterRealtimeResources) { gcs::GcsNodeManager node_manager(io_service, io_service, gcs_pub_sub_, gcs_table_storage_); - auto node_id = ClientID::FromRandom(); + auto node_id = NodeID::FromRandom(); rpc::HeartbeatTableData heartbeat; const std::string cpu_resource = "CPU"; (*heartbeat.mutable_resources_available())[cpu_resource] = 10; diff --git a/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc index 9fc2bd321..cd6f143fe 100644 --- a/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc @@ -28,24 +28,24 @@ class MockedGcsObjectManager : public gcs::GcsObjectManager { : gcs::GcsObjectManager(gcs_table_storage, gcs_pub_sub, gcs_node_manager) {} public: - void AddObjectsLocation(const ClientID &node_id, + void AddObjectsLocation(const NodeID &node_id, const absl::flat_hash_set &object_ids) { gcs::GcsObjectManager::AddObjectsLocation(node_id, object_ids); } - void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) { + void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) { gcs::GcsObjectManager::AddObjectLocationInCache(object_id, node_id); } - absl::flat_hash_set GetObjectLocations(const ObjectID &object_id) { + absl::flat_hash_set GetObjectLocations(const ObjectID &object_id) { return gcs::GcsObjectManager::GetObjectLocations(object_id); } - void OnNodeRemoved(const ClientID &node_id) { + void OnNodeRemoved(const NodeID &node_id) { gcs::GcsObjectManager::OnNodeRemoved(node_id); } - void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) { + void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) { gcs::GcsObjectManager::RemoveObjectLocationInCache(object_id, node_id); } }; @@ -67,12 +67,12 @@ class GcsObjectManagerTest : public ::testing::Test { object_ids_.emplace(object_id); } for (size_t i = 0; i < node_count_; ++i) { - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); node_ids_.emplace(node_id); } } - void CheckLocations(const absl::flat_hash_set &locations) { + void CheckLocations(const absl::flat_hash_set &locations) { ASSERT_EQ(locations.size(), node_ids_.size()); for (const auto &location : locations) { auto it = node_ids_.find(location); @@ -92,7 +92,7 @@ class GcsObjectManagerTest : public ::testing::Test { size_t object_count_{5}; size_t node_count_{10}; absl::flat_hash_set object_ids_; - absl::flat_hash_set node_ids_; + absl::flat_hash_set node_ids_; }; TEST_F(GcsObjectManagerTest, AddObjectsLocationAndGetLocationTest) { diff --git a/src/ray/gcs/gcs_server/test/gcs_placement_group_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_placement_group_manager_test.cc index 123c2875e..ca58ee16c 100644 --- a/src/ray/gcs/gcs_server/test/gcs_placement_group_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_placement_group_manager_test.cc @@ -40,7 +40,7 @@ class MockPlacementGroupScheduler : public gcs::GcsPlacementGroupSchedulerInterf MOCK_METHOD1(MarkScheduleCancelled, void(const PlacementGroupID &placement_group_id)); absl::flat_hash_map> GetBundlesOnNode( - const ClientID &node_id) override { + const NodeID &node_id) override { absl::flat_hash_map> bundles; bundles[group_on_dead_node_] = bundles_on_dead_node_; return bundles; @@ -306,8 +306,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) { ASSERT_EQ(finished_placement_group_count, 0); ASSERT_EQ(mock_placement_group_scheduler_->placement_groups_.size(), 1); auto placement_group = mock_placement_group_scheduler_->placement_groups_.back(); - placement_group->GetMutableBundle(0)->set_node_id(ClientID::FromRandom().Binary()); - placement_group->GetMutableBundle(1)->set_node_id(ClientID::FromRandom().Binary()); + placement_group->GetMutableBundle(0)->set_node_id(NodeID::FromRandom().Binary()); + placement_group->GetMutableBundle(1)->set_node_id(NodeID::FromRandom().Binary()); mock_placement_group_scheduler_->placement_groups_.pop_back(); // If a node dies, we will set the bundles above it to be unplaced and reschedule the @@ -316,7 +316,7 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) { mock_placement_group_scheduler_->group_on_dead_node_ = placement_group->GetPlacementGroupID(); mock_placement_group_scheduler_->bundles_on_dead_node_.push_back(0); - gcs_placement_group_manager_->OnNodeDead(ClientID::FromRandom()); + gcs_placement_group_manager_->OnNodeDead(NodeID::FromRandom()); // Trigger scheduling `RESCHEDULING` placement group. auto finished_group = std::make_shared( @@ -328,8 +328,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) { placement_group->GetPlacementGroupID()); const auto &bundles = mock_placement_group_scheduler_->placement_groups_[0]->GetBundles(); - EXPECT_TRUE(ClientID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil()); - EXPECT_FALSE(ClientID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil()); + EXPECT_TRUE(NodeID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil()); + EXPECT_FALSE(NodeID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil()); // If `RESCHEDULING` placement group fails to create, we will schedule it again first. placement_group = mock_placement_group_scheduler_->placement_groups_.back(); diff --git a/src/ray/gcs/gcs_server/test/gcs_placement_group_scheduler_test.cc b/src/ray/gcs/gcs_server/test/gcs_placement_group_scheduler_test.cc index 2104aedb2..8cd16ca04 100644 --- a/src/ray/gcs/gcs_server/test/gcs_placement_group_scheduler_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_placement_group_scheduler_test.cc @@ -63,7 +63,7 @@ class GcsPlacementGroupSchedulerTest : public ::testing::Test { gcs_node_manager_->AddNode(node); rpc::HeartbeatTableData heartbeat; (*heartbeat.mutable_resources_available())["CPU"] = cpu_num; - gcs_node_manager_->UpdateNodeRealtimeResources(ClientID::FromBinary(node->node_id()), + gcs_node_manager_->UpdateNodeRealtimeResources(NodeID::FromBinary(node->node_id()), heartbeat); } @@ -489,10 +489,10 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestRescheduleWhenNodeDead) { WaitPendingDone(success_placement_groups_, 1); auto bundles_on_node0 = - scheduler_->GetBundlesOnNode(ClientID::FromBinary(node0->node_id())); + scheduler_->GetBundlesOnNode(NodeID::FromBinary(node0->node_id())); ASSERT_EQ(1, bundles_on_node0.size()); auto bundles_on_node1 = - scheduler_->GetBundlesOnNode(ClientID::FromBinary(node1->node_id())); + scheduler_->GetBundlesOnNode(NodeID::FromBinary(node1->node_id())); ASSERT_EQ(1, bundles_on_node1.size()); // One node is dead, reschedule the placement group. auto bundle_on_dead_node = placement_group->GetMutableBundle(0); @@ -543,8 +543,8 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestStrictSpreadStrategyResourceCheck) { TEST_F(GcsPlacementGroupSchedulerTest, TestBundleLocationIndex) { gcs::BundleLocationIndex bundle_location_index; /// Generate data. - const auto node1 = ClientID::FromRandom(); - const auto node2 = ClientID::FromRandom(); + const auto node1 = NodeID::FromRandom(); + const auto node2 = NodeID::FromRandom(); rpc::CreatePlacementGroupRequest request_pg1 = Mocker::GenCreatePlacementGroupRequest("pg1"); const auto pg1_id = PlacementGroupID::FromBinary( @@ -641,7 +641,7 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestNodeDeadDuringCommitResources) { scheduler_->ScheduleUnplacedBundles(placement_group, failure_handler, success_handler); ASSERT_TRUE(raylet_clients_[0]->GrantPrepareBundleResources()); - gcs_node_manager_->RemoveNode(ClientID::FromBinary(node1->node_id())); + gcs_node_manager_->RemoveNode(NodeID::FromBinary(node1->node_id())); ASSERT_TRUE(raylet_clients_[1]->GrantPrepareBundleResources()); WaitPendingDone(failure_placement_groups_, 1); } diff --git a/src/ray/gcs/gcs_server/test/gcs_server_rpc_test.cc b/src/ray/gcs/gcs_server/test/gcs_server_rpc_test.cc index 78ab650f0..1faffd166 100644 --- a/src/ray/gcs/gcs_server/test/gcs_server_rpc_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_server_rpc_test.cc @@ -671,8 +671,8 @@ TEST_F(GcsServerTest, TestNodeInfo) { TEST_F(GcsServerTest, TestObjectInfo) { // Create object table data ObjectID object_id = ObjectID::FromRandom(); - ClientID node1_id = ClientID::FromRandom(); - ClientID node2_id = ClientID::FromRandom(); + NodeID node1_id = NodeID::FromRandom(); + NodeID node2_id = NodeID::FromRandom(); // Add object location rpc::AddObjectLocationRequest add_object_location_request; @@ -720,7 +720,7 @@ TEST_F(GcsServerTest, TestTaskInfo) { ASSERT_TRUE(!result.has_task()); // Add task lease - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); auto task_lease_data = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary()); rpc::AddTaskLeaseRequest add_task_lease_request; add_task_lease_request.mutable_task_lease_data()->CopyFrom(*task_lease_data); @@ -739,7 +739,7 @@ TEST_F(GcsServerTest, TestTaskInfo) { TEST_F(GcsServerTest, TestStats) { rpc::ProfileTableData profile_table_data; - profile_table_data.set_component_id(ClientID::FromRandom().Binary()); + profile_table_data.set_component_id(NodeID::FromRandom().Binary()); rpc::AddProfileDataRequest add_profile_data_request; add_profile_data_request.mutable_profile_data()->CopyFrom(profile_table_data); ASSERT_TRUE(AddProfileData(add_profile_data_request)); diff --git a/src/ray/gcs/gcs_server/test/gcs_server_test_util.h b/src/ray/gcs/gcs_server/test/gcs_server_test_util.h index 835993fb9..9ba2f5527 100644 --- a/src/ray/gcs/gcs_server/test/gcs_server_test_util.h +++ b/src/ray/gcs/gcs_server/test/gcs_server_test_util.h @@ -89,12 +89,12 @@ struct GcsServerMocker { } bool GrantWorkerLease() { - return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, ClientID::Nil()); + return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, NodeID::Nil()); } // Trigger reply to RequestWorkerLease. bool GrantWorkerLease(const std::string &address, int port, const WorkerID &worker_id, - const ClientID &raylet_id, const ClientID &retry_at_raylet_id, + const NodeID &raylet_id, const NodeID &retry_at_raylet_id, Status status = Status::OK()) { rpc::RequestWorkerLeaseReply reply; if (!retry_at_raylet_id.IsNil()) { @@ -150,7 +150,7 @@ struct GcsServerMocker { int num_workers_disconnected = 0; int num_leases_canceled = 0; int num_release_unused_workers = 0; - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); std::list> callbacks = {}; std::list> cancel_callbacks = {}; std::list> release_callbacks = {}; @@ -217,7 +217,7 @@ struct GcsServerMocker { int num_lease_requested = 0; int num_return_requested = 0; int num_commit_requested = 0; - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); std::list> lease_callbacks = {}; std::list> return_callbacks = {}; }; @@ -289,8 +289,8 @@ struct GcsServerMocker { Status UnregisterSelf() override { return Status::NotImplemented(""); } - const ClientID &GetSelfId() const override { - static ClientID node_id; + const NodeID &GetSelfId() const override { + static NodeID node_id; return node_id; } @@ -304,7 +304,7 @@ struct GcsServerMocker { return Status::NotImplemented(""); } - Status AsyncUnregister(const ClientID &node_id, + Status AsyncUnregister(const NodeID &node_id, const gcs::StatusCallback &callback) override { if (callback) { callback(Status::OK()); @@ -321,34 +321,34 @@ struct GcsServerMocker { } Status AsyncSubscribeToNodeChange( - const gcs::SubscribeCallback &subscribe, + const gcs::SubscribeCallback &subscribe, const gcs::StatusCallback &done) override { return Status::NotImplemented(""); } - boost::optional Get(const ClientID &node_id) const override { + boost::optional Get(const NodeID &node_id) const override { return boost::none; } - const std::unordered_map &GetAll() const override { - static std::unordered_map node_info_list; + const std::unordered_map &GetAll() const override { + static std::unordered_map node_info_list; return node_info_list; } - bool IsRemoved(const ClientID &node_id) const override { return false; } + bool IsRemoved(const NodeID &node_id) const override { return false; } Status AsyncGetResources( - const ClientID &node_id, + const NodeID &node_id, const gcs::OptionalItemCallback &callback) override { return Status::NotImplemented(""); } - Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources, + Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, const gcs::StatusCallback &callback) override { return Status::NotImplemented(""); } - Status AsyncDeleteResources(const ClientID &node_id, + Status AsyncDeleteResources(const NodeID &node_id, const std::vector &resource_names, const gcs::StatusCallback &callback) override { return Status::NotImplemented(""); @@ -366,7 +366,7 @@ struct GcsServerMocker { } Status AsyncSubscribeHeartbeat( - const gcs::SubscribeCallback &subscribe, + const gcs::SubscribeCallback &subscribe, const gcs::StatusCallback &done) override { return Status::NotImplemented(""); } diff --git a/src/ray/gcs/pb_util.h b/src/ray/gcs/pb_util.h index 2cb0ac535..3b2923724 100644 --- a/src/ray/gcs/pb_util.h +++ b/src/ray/gcs/pb_util.h @@ -85,7 +85,7 @@ inline std::shared_ptr CreateActorTableData( /// Helper function to produce worker failure data. inline std::shared_ptr CreateWorkerFailureData( - const ClientID &raylet_id, const WorkerID &worker_id, const std::string &address, + const NodeID &raylet_id, const WorkerID &worker_id, const std::string &address, int32_t port, int64_t timestamp = std::time(nullptr), bool intentional_disconnect = false) { auto worker_failure_info_ptr = std::make_shared(); @@ -104,7 +104,7 @@ inline std::shared_ptr CreateWorkerFailureData( /// \param is_add Whether the object is appeared on the node. /// \return The object location change created by this method. inline std::shared_ptr CreateObjectLocationChange( - const ClientID &node_id, bool is_add) { + const NodeID &node_id, bool is_add) { ray::rpc::ObjectTableData object_table_data; object_table_data.set_manager(node_id.Binary()); auto object_location_change = std::make_shared(); diff --git a/src/ray/gcs/redis_accessor.cc b/src/ray/gcs/redis_accessor.cc index b3cebaab1..0287af67b 100644 --- a/src/ray/gcs/redis_accessor.cc +++ b/src/ray/gcs/redis_accessor.cc @@ -139,8 +139,7 @@ Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll( const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - return log_based_actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, - done); + return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); } Status RedisLogBasedActorInfoAccessor::AsyncSubscribe( @@ -316,7 +315,7 @@ Status RedisActorInfoAccessor::AsyncSubscribeAll( const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - return actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done); + return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); } Status RedisActorInfoAccessor::AsyncSubscribe( @@ -361,7 +360,7 @@ Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr & Status RedisJobInfoAccessor::AsyncSubscribeAll( const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - return job_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done); + return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); } RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl) @@ -506,7 +505,7 @@ Status RedisObjectInfoAccessor::AsyncGetLocations( } Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id, - const ClientID &node_id, + const NodeID &node_id, const StatusCallback &callback) { std::function @@ -524,7 +523,7 @@ Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id, } Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id, - const ClientID &node_id, + const NodeID &node_id, const StatusCallback &callback) { std::function @@ -569,7 +568,7 @@ Status RedisNodeInfoAccessor::UnregisterSelf() { return client_table.Disconnect(); } -const ClientID &RedisNodeInfoAccessor::GetSelfId() const { +const NodeID &RedisNodeInfoAccessor::GetSelfId() const { ClientTable &client_table = client_impl_->client_table(); return client_table.GetLocalClientId(); } @@ -583,18 +582,18 @@ Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info, const StatusCallback &callback) { ClientTable::WriteCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &id, + on_done = [callback](RedisGcsClient *client, const NodeID &id, const GcsNodeInfo &data) { callback(Status::OK()); }; } ClientTable &client_table = client_impl_->client_table(); return client_table.MarkConnected(node_info, on_done); } -Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id, +Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) { ClientTable::WriteCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &id, + on_done = [callback](RedisGcsClient *client, const NodeID &id, const GcsNodeInfo &data) { callback(Status::OK()); }; } ClientTable &client_table = client_impl_->client_table(); @@ -602,8 +601,7 @@ Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id, } Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, - const StatusCallback &done) { + const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); ClientTable &client_table = client_impl_->client_table(); return client_table.SubscribeToNodeChange(subscribe, done); @@ -612,7 +610,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange( Status RedisNodeInfoAccessor::AsyncGetAll( const MultiItemCallback &callback) { RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const ClientID &id, + auto on_done = [callback](RedisGcsClient *client, const NodeID &id, const std::vector &data) { std::vector result; std::set node_ids; @@ -627,7 +625,7 @@ Status RedisNodeInfoAccessor::AsyncGetAll( return client_table.Lookup(on_done); } -boost::optional RedisNodeInfoAccessor::Get(const ClientID &node_id) const { +boost::optional RedisNodeInfoAccessor::Get(const NodeID &node_id) const { GcsNodeInfo node_info; ClientTable &client_table = client_impl_->client_table(); bool found = client_table.GetClient(node_id, &node_info); @@ -638,12 +636,12 @@ boost::optional RedisNodeInfoAccessor::Get(const ClientID &node_id) return optional_node; } -const std::unordered_map &RedisNodeInfoAccessor::GetAll() const { +const std::unordered_map &RedisNodeInfoAccessor::GetAll() const { ClientTable &client_table = client_impl_->client_table(); return client_table.GetAllClients(); } -bool RedisNodeInfoAccessor::IsRemoved(const ClientID &node_id) const { +bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const { ClientTable &client_table = client_impl_->client_table(); return client_table.IsRemoved(node_id); } @@ -651,11 +649,11 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat( const std::shared_ptr &data_ptr, const StatusCallback &callback) { HeartbeatTable::WriteCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &node_id, + on_done = [callback](RedisGcsClient *client, const NodeID &node_id, const HeartbeatTableData &data) { callback(Status::OK()); }; } - ClientID node_id = ClientID::FromBinary(data_ptr->client_id()); + NodeID node_id = NodeID::FromBinary(data_ptr->client_id()); HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table(); return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done); } @@ -663,15 +661,14 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat( void RedisNodeInfoAccessor::AsyncReReportHeartbeat() {} Status RedisNodeInfoAccessor::AsyncSubscribeHeartbeat( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - auto on_subscribe = [subscribe](const ClientID &node_id, - const HeartbeatTableData &data) { + auto on_subscribe = [subscribe](const NodeID &node_id, const HeartbeatTableData &data) { subscribe(node_id, data); }; - return heartbeat_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done); + return heartbeat_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done); } Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat( @@ -679,30 +676,30 @@ Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat( const StatusCallback &callback) { HeartbeatBatchTable::WriteCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &node_id, + on_done = [callback](RedisGcsClient *client, const NodeID &node_id, const HeartbeatBatchTableData &data) { callback(Status::OK()); }; } HeartbeatBatchTable &hb_batch_table = client_impl_->heartbeat_batch_table(); - return hb_batch_table.Add(JobID::Nil(), ClientID::Nil(), data_ptr, on_done); + return hb_batch_table.Add(JobID::Nil(), NodeID::Nil(), data_ptr, on_done); } Status RedisNodeInfoAccessor::AsyncSubscribeBatchHeartbeat( const ItemCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - auto on_subscribe = [subscribe](const ClientID &node_id, + auto on_subscribe = [subscribe](const NodeID &node_id, const HeartbeatBatchTableData &data) { subscribe(data); }; - return heartbeat_batch_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, + return heartbeat_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done); } Status RedisNodeInfoAccessor::AsyncGetResources( - const ClientID &node_id, const OptionalItemCallback &callback) { + const NodeID &node_id, const OptionalItemCallback &callback) { RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const ClientID &id, + auto on_done = [callback](RedisGcsClient *client, const NodeID &id, const ResourceMap &data) { boost::optional result; if (!data.empty()) { @@ -715,12 +712,12 @@ Status RedisNodeInfoAccessor::AsyncGetResources( return resource_table.Lookup(JobID::Nil(), node_id, on_done); } -Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id, +Status RedisNodeInfoAccessor::AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) { - Hash::HashCallback on_done = nullptr; + Hash::HashCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &node_id, + on_done = [callback](RedisGcsClient *client, const NodeID &node_id, const ResourceMap &resources) { callback(Status::OK()); }; } @@ -729,11 +726,11 @@ Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id, } Status RedisNodeInfoAccessor::AsyncDeleteResources( - const ClientID &node_id, const std::vector &resource_names, + const NodeID &node_id, const std::vector &resource_names, const StatusCallback &callback) { - Hash::HashRemoveCallback on_done = nullptr; + Hash::HashRemoveCallback on_done = nullptr; if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ClientID &node_id, + on_done = [callback](RedisGcsClient *client, const NodeID &node_id, const std::vector &resource_names) { callback(Status::OK()); }; @@ -746,7 +743,7 @@ Status RedisNodeInfoAccessor::AsyncDeleteResources( Status RedisNodeInfoAccessor::AsyncSubscribeToResources( const ItemCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - auto on_subscribe = [subscribe](const ClientID &id, + auto on_subscribe = [subscribe](const NodeID &id, const ResourceChangeNotification &result) { rpc::NodeResourceChange node_resource_change; node_resource_change.set_node_id(id.Binary()); @@ -762,7 +759,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToResources( } subscribe(node_resource_change); }; - return resource_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done); + return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done); } RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {} @@ -795,7 +792,7 @@ Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures( const SubscribeCallback &subscribe, const StatusCallback &done) { RAY_CHECK(subscribe != nullptr); - return worker_failure_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done); + return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); } Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure( diff --git a/src/ray/gcs/redis_accessor.h b/src/ray/gcs/redis_accessor.h index 2dadee599..b269bfd48 100644 --- a/src/ray/gcs/redis_accessor.h +++ b/src/ray/gcs/redis_accessor.h @@ -103,13 +103,13 @@ class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor { protected: RedisGcsClient *client_impl_{nullptr}; - // Use a random ClientID for actor subscription. Because: - // If we use ClientID::Nil, GCS will still send all actors' updates to this GCS Client. + // Use a random NodeID for actor subscription. Because: + // If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client. // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local ClientID, so we use - // random ClientID instead. + // And because the new GCS Client will no longer hold the local NodeID, so we use + // random NodeID instead. // TODO(micafan): Remove this random id, once GCS becomes a service. - ClientID subscribe_id_{ClientID::FromRandom()}; + NodeID subscribe_id_{NodeID::FromRandom()}; private: typedef SubscriptionExecutor @@ -246,13 +246,13 @@ class RedisTaskInfoAccessor : public TaskInfoAccessor { private: RedisGcsClient *client_impl_{nullptr}; - // Use a random ClientID for task subscription. Because: - // If we use ClientID::Nil, GCS will still send all tasks' updates to this GCS Client. + // Use a random NodeID for task subscription. Because: + // If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client. // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local ClientID, so we use - // random ClientID instead. + // And because the new GCS Client will no longer hold the local NodeID, so we use + // random NodeID instead. // TODO(micafan): Remove this random id, once GCS becomes a service. - ClientID subscribe_id_{ClientID::FromRandom()}; + NodeID subscribe_id_{NodeID::FromRandom()}; typedef SubscriptionExecutor TaskSubscriptionExecutor; @@ -280,10 +280,10 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor { return Status::NotImplemented("AsyncGetAll not implemented"); } - Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id, + Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) override; - Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id, + Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) override; Status AsyncSubscribeToLocations( @@ -298,13 +298,13 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor { private: RedisGcsClient *client_impl_{nullptr}; - // Use a random ClientID for object subscription. Because: - // If we use ClientID::Nil, GCS will still send all objects' updates to this GCS Client. + // Use a random NodeID for object subscription. Because: + // If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client. // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local ClientID, so we use - // random ClientID instead. + // And because the new GCS Client will no longer hold the local NodeID, so we use + // random NodeID instead. // TODO(micafan): Remove this random id, once GCS becomes a service. - ClientID subscribe_id_{ClientID::FromRandom()}; + NodeID subscribe_id_{NodeID::FromRandom()}; typedef SubscriptionExecutor ObjectSubscriptionExecutor; @@ -324,35 +324,34 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor { Status UnregisterSelf() override; - const ClientID &GetSelfId() const override; + const NodeID &GetSelfId() const override; const GcsNodeInfo &GetSelfInfo() const override; Status AsyncRegister(const GcsNodeInfo &node_info, const StatusCallback &callback) override; - Status AsyncUnregister(const ClientID &node_id, - const StatusCallback &callback) override; + Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override; Status AsyncGetAll(const MultiItemCallback &callback) override; Status AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) override; - boost::optional Get(const ClientID &node_id) const override; + boost::optional Get(const NodeID &node_id) const override; - const std::unordered_map &GetAll() const override; + const std::unordered_map &GetAll() const override; - bool IsRemoved(const ClientID &node_id) const override; + bool IsRemoved(const NodeID &node_id) const override; - Status AsyncGetResources(const ClientID &node_id, + Status AsyncGetResources(const NodeID &node_id, const OptionalItemCallback &callback) override; - Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources, + Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) override; - Status AsyncDeleteResources(const ClientID &node_id, + Status AsyncDeleteResources(const NodeID &node_id, const std::vector &resource_names, const StatusCallback &callback) override; @@ -365,7 +364,7 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor { void AsyncReReportHeartbeat() override; Status AsyncSubscribeHeartbeat( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) override; Status AsyncReportBatchHeartbeat( @@ -392,15 +391,15 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor { private: RedisGcsClient *client_impl_{nullptr}; - typedef SubscriptionExecutor + typedef SubscriptionExecutor DynamicResourceSubscriptionExecutor; DynamicResourceSubscriptionExecutor resource_sub_executor_; - typedef SubscriptionExecutor + typedef SubscriptionExecutor HeartbeatSubscriptionExecutor; HeartbeatSubscriptionExecutor heartbeat_sub_executor_; - typedef SubscriptionExecutor + typedef SubscriptionExecutor HeartbeatBatchSubscriptionExecutor; HeartbeatBatchSubscriptionExecutor heartbeat_batch_sub_executor_; }; diff --git a/src/ray/gcs/redis_context.cc b/src/ray/gcs/redis_context.cc index bb1230c06..dc61b12f0 100644 --- a/src/ray/gcs/redis_context.cc +++ b/src/ray/gcs/redis_context.cc @@ -381,7 +381,7 @@ Status RedisContext::RunArgvAsync(const std::vector &args, return status; } -Status RedisContext::SubscribeAsync(const ClientID &client_id, +Status RedisContext::SubscribeAsync(const NodeID &client_id, const TablePubsub pubsub_channel, const RedisCallback &redisCallback, int64_t *out_callback_index) { diff --git a/src/ray/gcs/redis_context.h b/src/ray/gcs/redis_context.h index c254ec408..36ce4bb72 100644 --- a/src/ray/gcs/redis_context.h +++ b/src/ray/gcs/redis_context.h @@ -236,7 +236,7 @@ class RedisContext { /// \param redisCallback The callback function that the notification calls. /// \param out_callback_index The output pointer to callback index. /// \return Status. - Status SubscribeAsync(const ClientID &client_id, const TablePubsub pubsub_channel, + Status SubscribeAsync(const NodeID &client_id, const TablePubsub pubsub_channel, const RedisCallback &redisCallback, int64_t *out_callback_index); /// Subscribes the client to the given pattern. diff --git a/src/ray/gcs/redis_gcs_client.cc b/src/ray/gcs/redis_gcs_client.cc index aa71d847a..26bf5556e 100644 --- a/src/ray/gcs/redis_gcs_client.cc +++ b/src/ray/gcs/redis_gcs_client.cc @@ -48,8 +48,8 @@ Status RedisGcsClient::Connect(boost::asio::io_service &io_service) { log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this)); actor_table_.reset(new ActorTable({primary_context}, this)); - // TODO(micafan) Modify ClientTable' Constructor(remove ClientID) in future. - // We will use NodeID instead of ClientID. + // TODO(micafan) Modify ClientTable' Constructor(remove NodeID) in future. + // We will use NodeID instead of NodeID. // For worker/driver, it might not have this field(NodeID). // For raylet, NodeID should be initialized in raylet layer(not here). client_table_.reset(new ClientTable({primary_context}, this)); diff --git a/src/ray/gcs/subscription_executor.cc b/src/ray/gcs/subscription_executor.cc index 2ba7f8094..26014169f 100644 --- a/src/ray/gcs/subscription_executor.cc +++ b/src/ray/gcs/subscription_executor.cc @@ -20,7 +20,7 @@ namespace gcs { template Status SubscriptionExecutor::AsyncSubscribeAll( - const ClientID &client_id, const SubscribeCallback &subscribe, + const NodeID &client_id, const SubscribeCallback &subscribe, const StatusCallback &done) { // TODO(micafan) Optimize the lock when necessary. // Consider avoiding locking in single-threaded processes. @@ -110,9 +110,9 @@ Status SubscriptionExecutor::AsyncSubscribeAll( template Status SubscriptionExecutor::AsyncSubscribe( - const ClientID &client_id, const ID &id, const SubscribeCallback &subscribe, + const NodeID &client_id, const ID &id, const SubscribeCallback &subscribe, const StatusCallback &done) { - RAY_CHECK(client_id != ClientID::Nil()); + RAY_CHECK(client_id != NodeID::Nil()); // NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts, // thus we need to call `RequestNotifications` in the Subscribe callback to ensure @@ -160,7 +160,7 @@ Status SubscriptionExecutor::AsyncSubscribe( template Status SubscriptionExecutor::AsyncUnsubscribe( - const ClientID &client_id, const ID &id, const StatusCallback &done) { + const NodeID &client_id, const ID &id, const StatusCallback &done) { SubscribeCallback subscribe = nullptr; { std::unique_lock lock(mutex_); @@ -205,11 +205,10 @@ template class SubscriptionExecutor; template class SubscriptionExecutor; template class SubscriptionExecutor, TaskLeaseTable>; -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor; +template class SubscriptionExecutor; +template class SubscriptionExecutor; template class SubscriptionExecutor; } // namespace gcs diff --git a/src/ray/gcs/subscription_executor.h b/src/ray/gcs/subscription_executor.h index 1a63029b0..a2a31046c 100644 --- a/src/ray/gcs/subscription_executor.h +++ b/src/ray/gcs/subscription_executor.h @@ -46,7 +46,7 @@ class SubscriptionExecutor { /// is registered or updated. /// \param done Callback that will be called when subscription is complete. /// \return Status - Status AsyncSubscribeAll(const ClientID &client_id, + Status AsyncSubscribeAll(const NodeID &client_id, const SubscribeCallback &subscribe, const StatusCallback &done); @@ -61,7 +61,7 @@ class SubscriptionExecutor { /// is registered or updated. /// \param done Callback that will be called when subscription is complete. /// \return Status - Status AsyncSubscribe(const ClientID &client_id, const ID &id, + Status AsyncSubscribe(const NodeID &client_id, const ID &id, const SubscribeCallback &subscribe, const StatusCallback &done); @@ -74,7 +74,7 @@ class SubscriptionExecutor { /// \param id The id of the element to be unsubscribed to. /// \param done Callback that will be called when cancel subscription is complete. /// \return Status - Status AsyncUnsubscribe(const ClientID &client_id, const ID &id, + Status AsyncUnsubscribe(const NodeID &client_id, const ID &id, const StatusCallback &done); private: diff --git a/src/ray/gcs/tables.cc b/src/ray/gcs/tables.cc index dd3c1e0d3..d05af86b7 100644 --- a/src/ray/gcs/tables.cc +++ b/src/ray/gcs/tables.cc @@ -137,7 +137,7 @@ Status Log::Lookup(const JobID &job_id, const ID &id, const Callback & } template -Status Log::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Log::Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done) { auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id, @@ -150,7 +150,7 @@ Status Log::Subscribe(const JobID &job_id, const ClientID &client_id, } template -Status Log::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Log::Subscribe(const JobID &job_id, const NodeID &client_id, const NotificationCallback &subscribe, const SubscriptionCallback &done) { RAY_CHECK(subscribe_callback_index_ == -1) @@ -192,7 +192,7 @@ Status Log::Subscribe(const JobID &job_id, const ClientID &client_id, template Status Log::RequestNotifications(const JobID &job_id, const ID &id, - const ClientID &client_id, + const NodeID &client_id, const StatusCallback &done) { RAY_CHECK(subscribe_callback_index_ >= 0) << "Client requested notifications on a key before Subscribe completed"; @@ -214,7 +214,7 @@ Status Log::RequestNotifications(const JobID &job_id, const ID &id, template Status Log::CancelNotifications(const JobID &job_id, const ID &id, - const ClientID &client_id, + const NodeID &client_id, const StatusCallback &done) { RAY_CHECK(subscribe_callback_index_ >= 0) << "Client canceled notifications on a key before Subscribe completed"; @@ -315,7 +315,7 @@ Status Table::Lookup(const JobID &job_id, const ID &id, const Callback } template -Status Table::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Table::Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const FailureCallback &failure, const SubscriptionCallback &done) { @@ -336,7 +336,7 @@ Status Table::Subscribe(const JobID &job_id, const ClientID &client_id } template -Status Table::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Table::Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done) { return Subscribe(job_id, client_id, subscribe, /*failure*/ nullptr, done); @@ -379,7 +379,7 @@ Status Set::Remove(const JobID &job_id, const ID &id, } template -Status Set::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Set::Subscribe(const JobID &job_id, const NodeID &client_id, const NotificationCallback &subscribe, const SubscriptionCallback &done) { auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id, @@ -481,7 +481,7 @@ Status Hash::Lookup(const JobID &job_id, const ID &id, } template -Status Hash::Subscribe(const JobID &job_id, const ClientID &client_id, +Status Hash::Subscribe(const JobID &job_id, const NodeID &client_id, const HashNotificationCallback &subscribe, const SubscriptionCallback &done) { RAY_CHECK(subscribe_callback_index_ == -1) @@ -551,7 +551,7 @@ void ClientTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback) void ClientTable::HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info) { - ClientID node_id = ClientID::FromBinary(node_info.node_id()); + NodeID node_id = NodeID::FromBinary(node_info.node_id()); bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE); // It's possible to get duplicate notifications from the client table, so // check whether this notification is new. @@ -598,14 +598,14 @@ void ClientTable::HandleNotification(RedisGcsClient *client, } } -const ClientID &ClientTable::GetLocalClientId() const { +const NodeID &ClientTable::GetLocalClientId() const { RAY_CHECK(!local_node_id_.IsNil()); return local_node_id_; } const GcsNodeInfo &ClientTable::GetLocalClient() const { return local_node_info_; } -bool ClientTable::IsRemoved(const ClientID &node_id) const { +bool ClientTable::IsRemoved(const NodeID &node_id) const { return removed_nodes_.count(node_id) == 1; } @@ -617,7 +617,7 @@ Status ClientTable::Connect(const GcsNodeInfo &local_node_info) { auto node_info_ptr = std::make_shared(local_node_info); Status status = SyncAppend(JobID::Nil(), client_log_key_, node_info_ptr); if (status.ok()) { - local_node_id_ = ClientID::FromBinary(local_node_info.node_id()); + local_node_id_ = NodeID::FromBinary(local_node_info.node_id()); local_node_info_ = local_node_info; } return status; @@ -642,7 +642,7 @@ ray::Status ClientTable::MarkConnected(const GcsNodeInfo &node_info, return Append(JobID::Nil(), client_log_key_, node_info_ptr, done); } -ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id, +ray::Status ClientTable::MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done) { auto node_info = std::make_shared(); node_info->set_node_id(dead_node_id.Binary()); @@ -651,8 +651,7 @@ ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id, } ray::Status ClientTable::SubscribeToNodeChange( - const SubscribeCallback &subscribe, - const StatusCallback &done) { + const SubscribeCallback &subscribe, const StatusCallback &done) { // Callback for a notification from the client table. auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key, const std::vector ¬ifications) { @@ -699,7 +698,7 @@ ray::Status ClientTable::SubscribeToNodeChange( return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done); } -bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) const { +bool ClientTable::GetClient(const NodeID &node_id, GcsNodeInfo *node_info) const { RAY_CHECK(!node_id.IsNil()); auto entry = node_cache_.find(node_id); auto found = (entry != node_cache_.end()); @@ -709,7 +708,7 @@ bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) con return found; } -const std::unordered_map &ClientTable::GetAllClients() const { +const std::unordered_map &ClientTable::GetAllClients() const { return node_cache_; } @@ -720,13 +719,13 @@ Status ClientTable::Lookup(const Callback &lookup) { std::string ClientTable::DebugString() const { std::stringstream result; - result << Log::DebugString(); + result << Log::DebugString(); result << ", cache size: " << node_cache_.size() << ", num removed: " << removed_nodes_.size(); return result.str(); } -Status TaskLeaseTable::Subscribe(const JobID &job_id, const ClientID &client_id, +Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done) { auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id, @@ -865,21 +864,21 @@ template class Table; template class Log; template class Log; template class Table; -template class Table; -template class Table; -template class Log; +template class Table; +template class Table; +template class Log; template class Log; template class Log; -template class Log; -template class Log; +template class Log; +template class Log; template class Log; template class Table; template class Table; template class Table; template class Table; -template class Log; -template class Hash; +template class Log; +template class Hash; } // namespace gcs diff --git a/src/ray/gcs/tables.h b/src/ray/gcs/tables.h index 6faea1e5e..fc144c8fb 100644 --- a/src/ray/gcs/tables.h +++ b/src/ray/gcs/tables.h @@ -71,10 +71,10 @@ template class PubsubInterface { public: virtual Status RequestNotifications(const JobID &job_id, const ID &id, - const ClientID &client_id, + const NodeID &client_id, const StatusCallback &done) = 0; virtual Status CancelNotifications(const JobID &job_id, const ID &id, - const ClientID &client_id, + const NodeID &client_id, const StatusCallback &done) = 0; virtual ~PubsubInterface(){}; }; @@ -195,7 +195,7 @@ class Log : public LogInterface, virtual public PubsubInterface { /// \param done Callback that is called when subscription is complete and we /// are ready to receive messages. /// \return Status - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done); /// Request notifications about a key in this table. @@ -214,8 +214,8 @@ class Log : public LogInterface, virtual public PubsubInterface { /// notifications can be requested, a call to `Subscribe` to this /// table with the same `client_id` must complete successfully. /// \return Status - Status RequestNotifications(const JobID &job_id, const ID &id, - const ClientID &client_id, const StatusCallback &done); + Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &client_id, + const StatusCallback &done); /// Cancel notifications about a key in this table. /// @@ -224,7 +224,7 @@ class Log : public LogInterface, virtual public PubsubInterface { /// \param client_id The client who originally requested notifications. /// \param done Callback that is called when cancel notifications is complete. /// \return Status - Status CancelNotifications(const JobID &job_id, const ID &id, const ClientID &client_id, + Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &client_id, const StatusCallback &done); /// Subscribe to any modifications to the key. The caller may choose @@ -245,7 +245,7 @@ class Log : public LogInterface, virtual public PubsubInterface { /// \param done Callback that is called when subscription is complete and we /// are ready to receive messages. /// \return Status - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const NotificationCallback &subscribe, const SubscriptionCallback &done); @@ -380,7 +380,7 @@ class Table : private Log, /// \param done Callback that is called when subscription is complete and we /// are ready to receive messages. /// \return Status - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const FailureCallback &failure, const SubscriptionCallback &done); @@ -399,7 +399,7 @@ class Table : private Log, /// \param done Callback that is called when subscription is complete and we /// are ready to receive messages. /// \return Status - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done); void Delete(const JobID &job_id, const ID &id) { Log::Delete(job_id, id); } @@ -499,7 +499,7 @@ class Set : private Log, /// \param done Callback that is called when subscription is complete and we /// are ready to receive messages. /// \return Status - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const NotificationCallback &subscribe, const SubscriptionCallback &done); @@ -600,7 +600,7 @@ class HashInterface { /// \param done SubscriptionCallback that is called when subscription is complete and /// we are ready to receive messages. /// \return Status - virtual Status Subscribe(const JobID &job_id, const ClientID &client_id, + virtual Status Subscribe(const JobID &job_id, const NodeID &client_id, const HashNotificationCallback &subscribe, const SubscriptionCallback &done) = 0; @@ -628,7 +628,7 @@ class Hash : private Log, Status Update(const JobID &job_id, const ID &id, const DataMap &pairs, const HashCallback &done) override; - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const HashNotificationCallback &subscribe, const SubscriptionCallback &done) override; @@ -656,7 +656,7 @@ class Hash : private Log, using Log::num_lookups_; }; -class DynamicResourceTable : public Hash { +class DynamicResourceTable : public Hash { public: DynamicResourceTable(const std::vector> &contexts, RedisGcsClient *client) @@ -680,7 +680,7 @@ class ObjectTable : public Set { virtual ~ObjectTable(){}; }; -class HeartbeatTable : public Table { +class HeartbeatTable : public Table { public: HeartbeatTable(const std::vector> &contexts, RedisGcsClient *client) @@ -691,7 +691,7 @@ class HeartbeatTable : public Table { virtual ~HeartbeatTable() {} }; -class HeartbeatBatchTable : public Table { +class HeartbeatBatchTable : public Table { public: HeartbeatBatchTable(const std::vector> &contexts, RedisGcsClient *client) @@ -807,7 +807,7 @@ class TaskLeaseTable : public Table { /// Implement this method for the subscription tools class SubscriptionExecutor. /// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor. - Status Subscribe(const JobID &job_id, const ClientID &client_id, + Status Subscribe(const JobID &job_id, const NodeID &client_id, const Callback &subscribe, const SubscriptionCallback &done); }; @@ -882,8 +882,8 @@ class ProfileTable : public Log { /// alive. When a client disconnects, or if another client detects its failure, /// it should append an entry to the log indicating that it is dead. A client /// that is marked as dead should never again be marked as alive; if it needs -/// to reconnect, it must connect with a different ClientID. -class ClientTable : public Log { +/// to reconnect, it must connect with a different NodeID. +class ClientTable : public Log { public: ClientTable(const std::vector> &contexts, RedisGcsClient *client) @@ -920,10 +920,10 @@ class ClientTable : public Log { /// \param done Callback that is called once the node has been marked to /// disconnected. /// \return Status - ray::Status MarkDisconnected(const ClientID &dead_node_id, const WriteCallback &done); + ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done); ray::Status SubscribeToNodeChange( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done); /// Get a client's information from the cache. The cache only contains @@ -934,12 +934,12 @@ class ClientTable : public Log { /// we have the client in the cache. /// a nil client ID. /// \return Whether teh client is in the cache. - bool GetClient(const ClientID &client, GcsNodeInfo *node_info) const; + bool GetClient(const NodeID &client, GcsNodeInfo *node_info) const; /// Get the local client's ID. /// /// \return The local client's ID. - const ClientID &GetLocalClientId() const; + const NodeID &GetLocalClientId() const; /// Get the local client's information. /// @@ -950,12 +950,12 @@ class ClientTable : public Log { /// /// \param node_id The ID of the client to check. /// \return Whether the client with ID client_id is removed. - bool IsRemoved(const ClientID &node_id) const; + bool IsRemoved(const NodeID &node_id) const; /// Get the information of all clients. /// /// \return The client ID to client information map. - const std::unordered_map &GetAllClients() const; + const std::unordered_map &GetAllClients() const; /// Lookup the client data in the client table. /// @@ -972,11 +972,11 @@ class ClientTable : public Log { /// The key at which the log of client information is stored. This key must /// be kept the same across all instances of the ClientTable, so that all /// clients append and read from the same key. - ClientID client_log_key_; + NodeID client_log_key_; private: using NodeChangeCallback = - std::function; + std::function; /// Register a callback to call when a new node is added or a node is removed. /// @@ -989,20 +989,20 @@ class ClientTable : public Log { /// Whether this client has called Disconnect(). bool disconnected_{false}; /// This node's ID. It will be initialized when we call method `Connect(...)`. - ClientID local_node_id_; + NodeID local_node_id_; /// Information about this node. GcsNodeInfo local_node_info_; /// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and /// RequestNotification. /// The reason for not using `local_node_id_` is because it is only initialized /// for registered nodes. - ClientID subscribe_id_{ClientID::FromRandom()}; + NodeID subscribe_id_{NodeID::FromRandom()}; /// The callback to call when a new node is added or a node is removed. NodeChangeCallback node_change_callback_{nullptr}; /// A cache for information about all nodes. - std::unordered_map node_cache_; + std::unordered_map node_cache_; /// The set of removed nodes. - std::unordered_set removed_nodes_; + std::unordered_set removed_nodes_; }; } // namespace gcs diff --git a/src/ray/gcs/test/gcs_test_util.h b/src/ray/gcs/test/gcs_test_util.h index fea0df303..ca7f98934 100644 --- a/src/ray/gcs/test/gcs_test_util.h +++ b/src/ray/gcs/test/gcs_test_util.h @@ -51,7 +51,7 @@ struct Mocker { bool detached = false, const std::string name = "") { rpc::Address owner_address; - owner_address.set_raylet_id(ClientID::FromRandom().Binary()); + owner_address.set_raylet_id(NodeID::FromRandom().Binary()); owner_address.set_ip_address("1234"); owner_address.set_port(5678); owner_address.set_worker_id(WorkerID::FromRandom().Binary()); @@ -67,7 +67,7 @@ struct Mocker { bool detached = false, const std::string name = "") { rpc::Address owner_address; - owner_address.set_raylet_id(ClientID::FromRandom().Binary()); + owner_address.set_raylet_id(NodeID::FromRandom().Binary()); owner_address.set_ip_address("1234"); owner_address.set_port(5678); owner_address.set_worker_id(WorkerID::FromRandom().Binary()); @@ -109,7 +109,7 @@ struct Mocker { static std::shared_ptr GenNodeInfo( uint16_t port = 0, const std::string address = "127.0.0.1") { auto node = std::make_shared(); - node->set_node_id(ClientID::FromRandom().Binary()); + node->set_node_id(NodeID::FromRandom().Binary()); node->set_node_manager_port(port); node->set_node_manager_address(address); return node; @@ -158,7 +158,7 @@ struct Mocker { } static std::shared_ptr GenProfileTableData( - const ClientID &node_id) { + const NodeID &node_id) { auto profile_table_data = std::make_shared(); profile_table_data->set_component_id(node_id.Binary()); return profile_table_data; diff --git a/src/ray/gcs/test/redis_gcs_client_test.cc b/src/ray/gcs/test/redis_gcs_client_test.cc index fd5b43d74..bb48d2115 100644 --- a/src/ray/gcs/test/redis_gcs_client_test.cc +++ b/src/ray/gcs/test/redis_gcs_client_test.cc @@ -70,7 +70,7 @@ class TestGcs : public ::testing::Test { }; TestGcs *test; -ClientID local_client_id = ClientID::FromRandom(); +NodeID local_client_id = NodeID::FromRandom(); class TestGcsWithAsio : public TestGcs { public: @@ -666,7 +666,7 @@ class SetTestHelper { // subscribed, we will append to the key several times and check that we get // notified for each. RAY_CHECK_OK(client->object_table().Subscribe( - job_id, ClientID::Nil(), notification_callback, subscribe_callback)); + job_id, NodeID::Nil(), notification_callback, subscribe_callback)); // Run the event loop. The loop will only stop if the registered subscription // callback is called (or an assertion failure). @@ -1005,7 +1005,7 @@ class LogSubscribeTestHelper { // subscribed, we will append to the key several times and check that we get // notified for each. RAY_CHECK_OK(client->job_table().Subscribe( - job_id, ClientID::Nil(), notification_callback, subscribe_callback)); + job_id, NodeID::Nil(), notification_callback, subscribe_callback)); // Run the event loop. The loop will only stop if the registered subscription // callback is called (or an assertion failure). @@ -1187,16 +1187,16 @@ TEST_F(TestGcsWithAsio, TestSetSubscribeCancel) { class ClientTableTestHelper { public: static void ClientTableNotification(std::shared_ptr client, - const ClientID &client_id, const GcsNodeInfo &data, + const NodeID &client_id, const GcsNodeInfo &data, bool is_alive) { - ClientID added_id = local_client_id; + NodeID added_id = local_client_id; ASSERT_EQ(client_id, added_id); - ASSERT_EQ(ClientID::FromBinary(data.node_id()), added_id); + ASSERT_EQ(NodeID::FromBinary(data.node_id()), added_id); ASSERT_EQ(data.state() == GcsNodeInfo::ALIVE, is_alive); GcsNodeInfo cached_client; ASSERT_TRUE(client->client_table().GetClient(added_id, &cached_client)); - ASSERT_EQ(ClientID::FromBinary(cached_client.node_id()), added_id); + ASSERT_EQ(NodeID::FromBinary(cached_client.node_id()), added_id); ASSERT_EQ(cached_client.state() == GcsNodeInfo::ALIVE, is_alive); } @@ -1205,7 +1205,7 @@ class ClientTableTestHelper { // Subscribe to a node gets added and removed. The latter // event will stop the event loop. RAY_CHECK_OK(client->client_table().SubscribeToNodeChange( - [client](const ClientID &id, const GcsNodeInfo &data) { + [client](const NodeID &id, const GcsNodeInfo &data) { // TODO(micafan) RAY_LOG(INFO) << "Test alive=" << data.state() << " id=" << id; if (data.state() == GcsNodeInfo::ALIVE) { @@ -1231,7 +1231,7 @@ class ClientTableTestHelper { // Register callbacks for when a client gets added and removed. The latter // event will stop the event loop. RAY_CHECK_OK(client->client_table().SubscribeToNodeChange( - [client](const ClientID &id, const GcsNodeInfo &data) { + [client](const NodeID &id, const GcsNodeInfo &data) { if (data.state() == GcsNodeInfo::ALIVE) { ClientTableNotification(client, id, data, /*is_insertion=*/true); // Disconnect from the client table. We should receive a notification @@ -1260,7 +1260,7 @@ class ClientTableTestHelper { // Register callbacks for when a client gets added and removed. The latter // event will stop the event loop. RAY_CHECK_OK(client->client_table().SubscribeToNodeChange( - [client](const ClientID &id, const GcsNodeInfo &data) { + [client](const NodeID &id, const GcsNodeInfo &data) { if (data.state() == GcsNodeInfo::ALIVE) { ClientTableNotification(client, id, data, true); } else { @@ -1291,14 +1291,14 @@ class ClientTableTestHelper { // Connect to the client table to start receiving notifications. RAY_CHECK_OK(client->client_table().Connect(local_node_info)); // Mark a different client as dead. - ClientID dead_client_id = ClientID::FromRandom(); + NodeID dead_client_id = NodeID::FromRandom(); RAY_CHECK_OK(client->client_table().MarkDisconnected(dead_client_id, nullptr)); // Make sure we only get a notification for the removal of the client we // marked as dead. RAY_CHECK_OK(client->client_table().SubscribeToNodeChange( [dead_client_id](const UniqueID &id, const GcsNodeInfo &data) { if (data.state() == GcsNodeInfo::DEAD) { - ASSERT_EQ(ClientID::FromBinary(data.node_id()), dead_client_id); + ASSERT_EQ(NodeID::FromBinary(data.node_id()), dead_client_id); test->Stop(); } }, @@ -1332,7 +1332,7 @@ class HashTableTestHelper { static void TestHashTable(const JobID &job_id, std::shared_ptr client) { uint64_t expected_count = 14; - ClientID client_id = ClientID::FromRandom(); + NodeID client_id = NodeID::FromRandom(); // Prepare the first resource map: data_map1. DynamicResourceTable::DataMap data_map1; auto cpu_data = std::make_shared(); @@ -1370,7 +1370,7 @@ class HashTableTestHelper { }; auto notification_callback = [data_map1, data_map2, compare_test, expected_count]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const std::vector &result) { RAY_CHECK(result.size() == 1); const ResourceChangeNotification ¬ification = result.back(); @@ -1399,13 +1399,13 @@ class HashTableTestHelper { }; // Step 0: Subscribe the change of the hash table. RAY_CHECK_OK(client->resource_table().Subscribe( - job_id, ClientID::Nil(), notification_callback, subscribe_callback)); + job_id, NodeID::Nil(), notification_callback, subscribe_callback)); RAY_CHECK_OK(client->resource_table().RequestNotifications(job_id, client_id, local_client_id, nullptr)); // Step 1: Add elements to the hash table. auto update_callback1 = [data_map1, compare_test]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { compare_test(data_map1, callback_data); test->IncrementNumCallbacks(); @@ -1413,7 +1413,7 @@ class HashTableTestHelper { RAY_CHECK_OK( client->resource_table().Update(job_id, client_id, data_map1, update_callback1)); auto lookup_callback1 = [data_map1, compare_test]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { compare_test(data_map1, callback_data); test->IncrementNumCallbacks(); @@ -1423,14 +1423,14 @@ class HashTableTestHelper { // Step 2: Decrease one element, increase one and add a new one. RAY_CHECK_OK(client->resource_table().Update(job_id, client_id, data_map2, nullptr)); auto lookup_callback2 = [data_map2, compare_test]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { compare_test(data_map2, callback_data); test->IncrementNumCallbacks(); }; RAY_CHECK_OK(client->resource_table().Lookup(job_id, client_id, lookup_callback2)); std::vector delete_keys({"GPU", "CUSTOM", "None-Existent"}); - auto remove_callback = [delete_keys](RedisGcsClient *client, const ClientID &id, + auto remove_callback = [delete_keys](RedisGcsClient *client, const NodeID &id, const std::vector &callback_data) { for (size_t i = 0; i < callback_data.size(); ++i) { // All deleting keys exist in this argument even if the key doesn't exist. @@ -1444,7 +1444,7 @@ class HashTableTestHelper { data_map3.erase("GPU"); data_map3.erase("CUSTOM"); auto lookup_callback3 = [data_map3, compare_test]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { compare_test(data_map3, callback_data); test->IncrementNumCallbacks(); @@ -1455,7 +1455,7 @@ class HashTableTestHelper { RAY_CHECK_OK( client->resource_table().Update(job_id, client_id, data_map1, update_callback1)); auto lookup_callback4 = [data_map1, compare_test]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { compare_test(data_map1, callback_data); test->IncrementNumCallbacks(); @@ -1466,7 +1466,7 @@ class HashTableTestHelper { RAY_CHECK_OK(client->resource_table().RemoveEntries( job_id, client_id, {"GPU", "CPU", "CUSTOM", "None-Existent"}, nullptr)); auto lookup_callback5 = [expected_count]( - RedisGcsClient *client, const ClientID &id, + RedisGcsClient *client, const NodeID &id, const DynamicResourceTable::DataMap &callback_data) { ASSERT_EQ(callback_data.size(), 0); test->IncrementNumCallbacks(); diff --git a/src/ray/gcs/test/redis_node_info_accessor_test.cc b/src/ray/gcs/test/redis_node_info_accessor_test.cc index e46e0d242..49b31b09f 100644 --- a/src/ray/gcs/test/redis_node_info_accessor_test.cc +++ b/src/ray/gcs/test/redis_node_info_accessor_test.cc @@ -23,12 +23,12 @@ namespace ray { namespace gcs { -class NodeDynamicResourceTest : public AccessorTestBase { +class NodeDynamicResourceTest : public AccessorTestBase { protected: typedef NodeInfoAccessor::ResourceMap ResourceMap; virtual void GenTestData() { for (size_t node_index = 0; node_index < node_number_; ++node_index) { - ClientID id = ClientID::FromRandom(); + NodeID id = NodeID::FromRandom(); ResourceMap resource_map; for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) { std::shared_ptr rs_data = @@ -44,7 +44,7 @@ class NodeDynamicResourceTest : public AccessorTestBase id_to_resource_map_; + std::unordered_map id_to_resource_map_; size_t node_number_{100}; size_t resource_type_number_{5}; @@ -59,7 +59,7 @@ TEST_F(NodeDynamicResourceTest, UpdateAndGet) { NodeInfoAccessor &node_accessor = gcs_client_->Nodes(); for (const auto &node_rs : id_to_resource_map_) { ++pending_count_; - const ClientID &id = node_rs.first; + const NodeID &id = node_rs.first; // Update Status status = node_accessor.AsyncUpdateResources( node_rs.first, node_rs.second, [this, &node_accessor, id](Status status) { @@ -95,7 +95,7 @@ TEST_F(NodeDynamicResourceTest, Delete) { for (const auto &node_rs : id_to_resource_map_) { ++pending_count_; - const ClientID &id = node_rs.first; + const NodeID &id = node_rs.first; // Delete Status status = node_accessor.AsyncDeleteResources( id, resource_to_delete_, [this, &node_accessor, id](Status status) { @@ -128,7 +128,7 @@ TEST_F(NodeDynamicResourceTest, Subscribe) { WaitPendingDone(wait_pending_timeout_); auto subscribe = [this](const rpc::NodeResourceChange ¬ification) { - auto id = ClientID::FromBinary(notification.node_id()); + auto id = NodeID::FromBinary(notification.node_id()); RAY_LOG(INFO) << "receive client id=" << id; auto it = id_to_resource_map_.find(id); ASSERT_TRUE(it != id_to_resource_map_.end()); diff --git a/src/ray/gcs/test/redis_object_info_accessor_test.cc b/src/ray/gcs/test/redis_object_info_accessor_test.cc index 604407ece..e69c9bc79 100644 --- a/src/ray/gcs/test/redis_object_info_accessor_test.cc +++ b/src/ray/gcs/test/redis_object_info_accessor_test.cc @@ -32,7 +32,7 @@ class RedisObjectInfoAccessorTest : public AccessorTestBase(); - ClientID node_id = ClientID::FromRandom(); + NodeID node_id = NodeID::FromRandom(); object->set_manager(node_id.Binary()); object_vec.emplace_back(std::move(object)); } @@ -55,7 +55,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) { for (const auto &elem : object_id_to_data_) { for (const auto &item : elem.second) { ++pending_count_; - ClientID node_id = ClientID::FromBinary(item->manager()); + NodeID node_id = NodeID::FromBinary(item->manager()); RAY_CHECK_OK( object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) { RAY_CHECK_OK(status); @@ -113,7 +113,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) { ++pending_count_; ++sub_pending_count; const ObjectVector &object_vec = elem.second; - ClientID node_id = ClientID::FromBinary(object_vec[0]->manager()); + NodeID node_id = NodeID::FromBinary(object_vec[0]->manager()); RAY_CHECK_OK( object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) { RAY_CHECK_OK(status); diff --git a/src/ray/gcs/test/subscription_executor_test.cc b/src/ray/gcs/test/subscription_executor_test.cc index fdd4d5afc..44326bf97 100644 --- a/src/ray/gcs/test/subscription_executor_test.cc +++ b/src/ray/gcs/test/subscription_executor_test.cc @@ -103,12 +103,12 @@ class SubscriptionExecutorTest : public AccessorTestBaseAsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_); + actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_); WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_); ASSERT_TRUE(status.ok()); sub_pending_count_ = id_to_data_.size(); AsyncRegisterActorToGcs(); - status = actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_); + status = actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_); ASSERT_TRUE(status.IsInvalid()); WaitPendingDone(sub_pending_count_, wait_pending_timeout_); } @@ -117,13 +117,13 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneWithClientIDTest) { const auto &item = id_to_data_.begin(); ++do_sub_pending_count_; ++sub_pending_count_; - Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first, + Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first, subscribe_, sub_done_); WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_); ASSERT_TRUE(status.ok()); AsyncRegisterActorToGcs(); WaitPendingDone(sub_pending_count_, wait_pending_timeout_); - status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first, + status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first, subscribe_, sub_done_); ASSERT_TRUE(status.IsInvalid()); } @@ -133,12 +133,12 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT ++do_sub_pending_count_; ++sub_pending_count_; AsyncRegisterActorToGcs(); - Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first, + Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first, subscribe_, sub_done_); WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_); ASSERT_TRUE(status.ok()); WaitPendingDone(sub_pending_count_, wait_pending_timeout_); - status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first, + status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first, subscribe_, sub_done_); ASSERT_TRUE(status.IsInvalid()); } @@ -146,11 +146,11 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) { ++do_sub_pending_count_; Status status = - actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_); + actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_); ASSERT_TRUE(status.ok()); WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_); for (const auto &item : id_to_data_) { - status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item.first, + status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item.first, subscribe_, sub_done_); ASSERT_FALSE(status.ok()); } @@ -160,7 +160,7 @@ TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) { } TEST_F(SubscriptionExecutorTest, UnsubscribeTest) { - ClientID client_id = ClientID::FromRandom(); + NodeID client_id = NodeID::FromRandom(); Status status; for (const auto &item : id_to_data_) { status = actor_sub_executor_->AsyncUnsubscribe(client_id, item.first, unsub_done_); diff --git a/src/ray/object_manager/object_directory.cc b/src/ray/object_manager/object_directory.cc index 973d457b4..403e05893 100644 --- a/src/ray/object_manager/object_directory.cc +++ b/src/ray/object_manager/object_directory.cc @@ -32,13 +32,13 @@ using ray::rpc::ObjectTableData; bool UpdateObjectLocations(bool is_added, const std::vector &location_updates, std::shared_ptr gcs_client, - std::unordered_set *node_ids) { + std::unordered_set *node_ids) { // location_updates contains the updates of locations of the object. // with GcsChangeMode, we can determine whether the update mode is // addition or deletion. bool isUpdated = false; for (const auto &object_table_data : location_updates) { - ClientID node_id = ClientID::FromBinary(object_table_data.manager()); + NodeID node_id = NodeID::FromBinary(object_table_data.manager()); if (is_added && 0 == node_ids->count(node_id)) { node_ids->insert(node_id); isUpdated = true; @@ -62,7 +62,7 @@ bool UpdateObjectLocations(bool is_added, } // namespace ray::Status ObjectDirectory::ReportObjectAdded( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) { RAY_LOG(DEBUG) << "Reporting object added to GCS " << object_id; ray::Status status = @@ -71,7 +71,7 @@ ray::Status ObjectDirectory::ReportObjectAdded( } ray::Status ObjectDirectory::ReportObjectRemoved( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) { RAY_LOG(DEBUG) << "Reporting object removed to GCS " << object_id; ray::Status status = @@ -83,7 +83,7 @@ void ObjectDirectory::LookupRemoteConnectionInfo( RemoteConnectionInfo &connection_info) const { auto node_info = gcs_client_->Nodes().Get(connection_info.client_id); if (node_info) { - ClientID result_node_id = ClientID::FromBinary(node_info->node_id()); + NodeID result_node_id = NodeID::FromBinary(node_info->node_id()); RAY_CHECK(result_node_id == connection_info.client_id); if (node_info->state() == GcsNodeInfo::ALIVE) { connection_info.ip = node_info->node_manager_address(); @@ -105,7 +105,7 @@ std::vector ObjectDirectory::LookupAllRemoteConnections() return remote_connections; } -void ObjectDirectory::HandleClientRemoved(const ClientID &client_id) { +void ObjectDirectory::HandleClientRemoved(const NodeID &client_id) { for (auto &listener : listeners_) { const ObjectID &object_id = listener.first; if (listener.second.current_object_locations.count(client_id) > 0) { @@ -224,7 +224,7 @@ ray::Status ObjectDirectory::LookupLocations(const ObjectID &object_id, RAY_CHECK(status.ok()) << "Failed to get object location from GCS: " << status.message(); // Build the set of current locations based on the entries in the log. - std::unordered_set node_ids; + std::unordered_set node_ids; UpdateObjectLocations(/*is_added*/ true, location_updates, gcs_client_, &node_ids); // It is safe to call the callback directly since this is already running diff --git a/src/ray/object_manager/object_directory.h b/src/ray/object_manager/object_directory.h index a186e766f..b399b66b9 100644 --- a/src/ray/object_manager/object_directory.h +++ b/src/ray/object_manager/object_directory.h @@ -29,13 +29,13 @@ namespace ray { /// Connection information for remote object managers. struct RemoteConnectionInfo { - RemoteConnectionInfo(const ClientID &id) : client_id(id) {} + RemoteConnectionInfo(const NodeID &id) : client_id(id) {} // Returns whether there is enough information to connect to the remote // object manager. bool Connected() const { return !ip.empty(); } - ClientID client_id; + NodeID client_id; std::string ip; uint16_t port; }; @@ -60,7 +60,7 @@ class ObjectDirectoryInterface { /// Callback for object location notifications. using OnLocationsFound = std::function &)>; + const std::unordered_set &)>; /// Lookup object locations. Callback may be invoked with empty list of client ids. /// @@ -76,9 +76,9 @@ class ObjectDirectoryInterface { /// location, and fires the subscribed callbacks for those objects. /// /// \param client_id The object manager client that was removed. - virtual void HandleClientRemoved(const ClientID &client_id) = 0; + virtual void HandleClientRemoved(const NodeID &client_id) = 0; - /// Subscribe to be notified of locations (ClientID) of the given object. + /// Subscribe to be notified of locations (NodeID) of the given object. /// The callback will be invoked with the complete list of known locations /// whenever the set of locations changes. The callback will also be fired if /// the list of known locations is empty. The callback provided to this @@ -113,7 +113,7 @@ class ObjectDirectoryInterface { /// \param object_info Additional information about the object. /// \return Status of whether this method succeeded. virtual ray::Status ReportObjectAdded( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) = 0; /// Report objects removed from this client's store to the object directory. @@ -123,7 +123,7 @@ class ObjectDirectoryInterface { /// \param object_info Additional information about the object. /// \return Status of whether this method succeeded. virtual ray::Status ReportObjectRemoved( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) = 0; /// Returns debug string for class. @@ -154,7 +154,7 @@ class ObjectDirectory : public ObjectDirectoryInterface { const rpc::Address &owner_address, const OnLocationsFound &callback) override; - void HandleClientRemoved(const ClientID &client_id) override; + void HandleClientRemoved(const NodeID &client_id) override; ray::Status SubscribeObjectLocations(const UniqueID &callback_id, const ObjectID &object_id, @@ -164,10 +164,10 @@ class ObjectDirectory : public ObjectDirectoryInterface { const ObjectID &object_id) override; ray::Status ReportObjectAdded( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) override; ray::Status ReportObjectRemoved( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) override; std::string DebugString() const override; @@ -181,7 +181,7 @@ class ObjectDirectory : public ObjectDirectoryInterface { /// The callback to invoke when object locations are found. std::unordered_map callbacks; /// The current set of known locations of this object. - std::unordered_set current_object_locations; + std::unordered_set current_object_locations; /// This flag will get set to true if received any notification of the object. /// It means current_object_locations is up-to-date with GCS. It /// should never go back to false once set to true. If this is true, and diff --git a/src/ray/object_manager/object_manager.cc b/src/ray/object_manager/object_manager.cc index fff7725a5..ee7d412bd 100644 --- a/src/ray/object_manager/object_manager.cc +++ b/src/ray/object_manager/object_manager.cc @@ -48,7 +48,7 @@ ObjectStoreRunner::~ObjectStoreRunner() { } } -ObjectManager::ObjectManager(asio::io_service &main_service, const ClientID &self_node_id, +ObjectManager::ObjectManager(asio::io_service &main_service, const NodeID &self_node_id, const ObjectManagerConfig &config, std::shared_ptr object_directory) : self_node_id_(self_node_id), @@ -184,7 +184,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id, // no ordering guarantee between notifications. return object_directory_->SubscribeObjectLocations( object_directory_pull_callback_id_, object_id, owner_address, - [this](const ObjectID &object_id, const std::unordered_set &client_ids) { + [this](const ObjectID &object_id, const std::unordered_set &client_ids) { // Exit if the Pull request has already been fulfilled or canceled. auto it = pull_requests_.find(object_id); if (it == pull_requests_.end()) { @@ -195,7 +195,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id, // we may end up sending a duplicate request to the same client as // before. it->second.client_locations = - std::vector(client_ids.begin(), client_ids.end()); + std::vector(client_ids.begin(), client_ids.end()); if (it->second.client_locations.empty()) { // The object locations are now empty, so we should wait for the next // notification about a new object location. Cancel the timer until @@ -244,7 +244,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) { // Generate a random index. std::uniform_int_distribution distribution(0, node_vector.size() - 1); int node_index = distribution(gen_); - ClientID node_id = node_vector[node_index]; + NodeID node_id = node_vector[node_index]; // If the object manager somehow ended up choosing itself, choose a different // object manager. if (node_id == self_node_id_) { @@ -309,7 +309,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) { }; void ObjectManager::SendPullRequest( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, std::shared_ptr rpc_client) { rpc::PullRequest pull_request; pull_request.set_object_id(object_id.Binary()); @@ -325,7 +325,7 @@ void ObjectManager::SendPullRequest( } void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id, - const ClientID &client_id) { + const NodeID &client_id) { RAY_LOG(WARNING) << "Invalid Push request ObjectID: " << object_id << " after waiting for " << config_.push_timeout_ms << " ms."; auto iter = unfulfilled_push_requests_.find(object_id); @@ -337,10 +337,9 @@ void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id, } } -void ObjectManager::HandleSendFinished(const ObjectID &object_id, - const ClientID &client_id, uint64_t chunk_index, - double start_time, double end_time, - ray::Status status) { +void ObjectManager::HandleSendFinished(const ObjectID &object_id, const NodeID &client_id, + uint64_t chunk_index, double start_time, + double end_time, ray::Status status) { RAY_LOG(DEBUG) << "HandleSendFinished on " << self_node_id_ << " to " << client_id << " of object " << object_id << " chunk " << chunk_index << ", status: " << status.ToString(); @@ -363,7 +362,7 @@ void ObjectManager::HandleSendFinished(const ObjectID &object_id, } void ObjectManager::HandleReceiveFinished(const ObjectID &object_id, - const ClientID &client_id, uint64_t chunk_index, + const NodeID &client_id, uint64_t chunk_index, double start_time, double end_time, ray::Status status) { if (!status.ok()) { @@ -385,7 +384,7 @@ void ObjectManager::HandleReceiveFinished(const ObjectID &object_id, profile_events_.push_back(profile_event); } -void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) { +void ObjectManager::Push(const ObjectID &object_id, const NodeID &client_id) { RAY_LOG(DEBUG) << "Push on " << self_node_id_ << " to " << client_id << " of object " << object_id; if (local_objects_.count(object_id) == 0) { @@ -483,7 +482,7 @@ void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) { ray::Status ObjectManager::SendObjectChunk( const UniqueID &push_id, const ObjectID &object_id, const rpc::Address &owner_address, - const ClientID &client_id, uint64_t data_size, uint64_t metadata_size, + const NodeID &client_id, uint64_t data_size, uint64_t metadata_size, uint64_t chunk_index, std::shared_ptr rpc_client) { double start_time = absl::GetCurrentTimeNanos() / 1e9; rpc::PushRequest push_request; @@ -606,7 +605,7 @@ ray::Status ObjectManager::LookupRemainingWaitObjects(const UniqueID &wait_id) { RAY_RETURN_NOT_OK(object_directory_->LookupLocations( object_id, wait_state.owner_addresses[object_id], [this, wait_id](const ObjectID &lookup_object_id, - const std::unordered_set &client_ids) { + const std::unordered_set &client_ids) { auto &wait_state = active_wait_requests_.find(wait_id)->second; // Note that the object is guaranteed to be added to local_objects_ before // the notification is triggered. @@ -647,7 +646,7 @@ void ObjectManager::SubscribeRemainingWaitObjects(const UniqueID &wait_id) { RAY_CHECK_OK(object_directory_->SubscribeObjectLocations( wait_id, object_id, wait_state.owner_addresses[object_id], [this, wait_id](const ObjectID &subscribe_object_id, - const std::unordered_set &client_ids) { + const std::unordered_set &client_ids) { auto object_id_wait_state = active_wait_requests_.find(wait_id); if (object_id_wait_state == active_wait_requests_.end()) { // Depending on the timing of calls to the object directory, we @@ -738,7 +737,7 @@ void ObjectManager::WaitComplete(const UniqueID &wait_id) { void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply *reply, rpc::SendReplyCallback send_reply_callback) { ObjectID object_id = ObjectID::FromBinary(request.object_id()); - ClientID client_id = ClientID::FromBinary(request.client_id()); + NodeID client_id = NodeID::FromBinary(request.client_id()); // Serialize. uint64_t chunk_index = request.chunk_index(); @@ -756,7 +755,7 @@ void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply * send_reply_callback(status, nullptr, nullptr); } -ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id, +ray::Status ObjectManager::ReceiveObjectChunk(const NodeID &client_id, const ObjectID &object_id, const rpc::Address &owner_address, uint64_t data_size, uint64_t metadata_size, @@ -787,7 +786,7 @@ ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id, void ObjectManager::HandlePull(const rpc::PullRequest &request, rpc::PullReply *reply, rpc::SendReplyCallback send_reply_callback) { ObjectID object_id = ObjectID::FromBinary(request.object_id()); - ClientID client_id = ClientID::FromBinary(request.client_id()); + NodeID client_id = NodeID::FromBinary(request.client_id()); RAY_LOG(DEBUG) << "Received pull request from client " << client_id << " for object [" << object_id << "]."; @@ -855,7 +854,7 @@ void ObjectManager::SpreadFreeObjectsRequest( } std::shared_ptr ObjectManager::GetRpcClient( - const ClientID &client_id) { + const NodeID &client_id) { auto it = remote_object_manager_clients_.find(client_id); if (it == remote_object_manager_clients_.end()) { RemoteConnectionInfo connection_info(client_id); diff --git a/src/ray/object_manager/object_manager.h b/src/ray/object_manager/object_manager.h index 80396e840..f5dacb11a 100644 --- a/src/ray/object_manager/object_manager.h +++ b/src/ray/object_manager/object_manager.h @@ -26,10 +26,6 @@ #include #include -#include -#include -#include - #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" #include "absl/time/clock.h" @@ -80,7 +76,7 @@ struct LocalObjectInfo { object_manager::protocol::ObjectInfoT object_info; /// A map from the ID of a remote object manager to the timestamp of when /// the object was last pushed to that object manager (if a push took place). - std::unordered_map recent_pushes; + std::unordered_map recent_pushes; }; class ObjectStoreRunner { @@ -146,9 +142,9 @@ class ObjectManager : public ObjectManagerInterface, /// \param chunk_index Chunk index of this object chunk, start with 0 /// \param rpc_client Rpc client used to send message to remote object manager ray::Status SendObjectChunk(const UniqueID &push_id, const ObjectID &object_id, - const rpc::Address &owner_address, - const ClientID &client_id, uint64_t data_size, - uint64_t metadata_size, uint64_t chunk_index, + const rpc::Address &owner_address, const NodeID &client_id, + uint64_t data_size, uint64_t metadata_size, + uint64_t chunk_index, std::shared_ptr rpc_client); /// Receive object chunk from remote object manager, small object may contain one chunk @@ -160,7 +156,7 @@ class ObjectManager : public ObjectManagerInterface, /// \param metadata_size Metadata size /// \param chunk_index Chunk index /// \param data Chunk data - ray::Status ReceiveObjectChunk(const ClientID &client_id, const ObjectID &object_id, + ray::Status ReceiveObjectChunk(const NodeID &client_id, const ObjectID &object_id, const rpc::Address &owner_address, uint64_t data_size, uint64_t metadata_size, uint64_t chunk_index, const std::string &data); @@ -169,13 +165,13 @@ class ObjectManager : public ObjectManagerInterface, /// /// \param object_id Object id /// \param client_id Remote server client id - void SendPullRequest(const ObjectID &object_id, const ClientID &client_id, + void SendPullRequest(const ObjectID &object_id, const NodeID &client_id, std::shared_ptr rpc_client); /// Get the rpc client according to the client ID /// /// \param client_id Remote client id, will send rpc request to it - std::shared_ptr GetRpcClient(const ClientID &client_id); + std::shared_ptr GetRpcClient(const NodeID &client_id); /// Get the port of the object manager rpc server. int GetServerPort() const { return object_manager_server_.GetPort(); } @@ -189,7 +185,7 @@ class ObjectManager : public ObjectManagerInterface, /// \param config ObjectManager configuration. /// \param object_directory An object implementing the object directory interface. explicit ObjectManager(boost::asio::io_service &main_service, - const ClientID &self_node_id, const ObjectManagerConfig &config, + const NodeID &self_node_id, const ObjectManagerConfig &config, std::shared_ptr object_directory); ~ObjectManager(); @@ -221,9 +217,9 @@ class ObjectManager : public ObjectManagerInterface, /// \param object_id The object's object id. /// \param client_id The remote node's client id. /// \return Void. - void Push(const ObjectID &object_id, const ClientID &client_id); + void Push(const ObjectID &object_id, const NodeID &client_id); - /// Pull an object from ClientID. + /// Pull an object from NodeID. /// /// \param object_id The object's object id. /// \return Status of whether the pull request successfully initiated. @@ -294,7 +290,7 @@ class ObjectManager : public ObjectManagerInterface, PullRequest() : retry_timer(nullptr), timer_set(false), client_locations() {} std::unique_ptr retry_timer; bool timer_set; - std::vector client_locations; + std::vector client_locations; }; struct WaitState { @@ -377,7 +373,7 @@ class ObjectManager : public ObjectManagerInterface, /// chunk. /// \param status The status of the send (e.g., did it succeed or fail). /// \return Void. - void HandleSendFinished(const ObjectID &object_id, const ClientID &client_id, + void HandleSendFinished(const ObjectID &object_id, const NodeID &client_id, uint64_t chunk_index, double start_time_us, double end_time_us, ray::Status status); @@ -393,14 +389,14 @@ class ObjectManager : public ObjectManagerInterface, /// chunk. /// \param status The status of the receive (e.g., did it succeed or fail). /// \return Void. - void HandleReceiveFinished(const ObjectID &object_id, const ClientID &client_id, + void HandleReceiveFinished(const ObjectID &object_id, const NodeID &client_id, uint64_t chunk_index, double start_time_us, double end_time_us, ray::Status status); /// Handle Push task timeout. - void HandlePushTaskTimeout(const ObjectID &object_id, const ClientID &client_id); + void HandlePushTaskTimeout(const ObjectID &object_id, const NodeID &client_id); - ClientID self_node_id_; + NodeID self_node_id_; const ObjectManagerConfig config_; std::shared_ptr object_directory_; // Object store runner. @@ -439,8 +435,7 @@ class ObjectManager : public ObjectManagerInterface, /// Maintains a map of push requests that have not been fulfilled due to an object not /// being local. Objects are removed from this map after push_timeout_ms have elapsed. std::unordered_map< - ObjectID, - std::unordered_map>> + ObjectID, std::unordered_map>> unfulfilled_push_requests_; /// The objects that this object manager is currently trying to fetch from @@ -468,7 +463,7 @@ class ObjectManager : public ObjectManagerInterface, rpc::ClientCallManager client_call_manager_; /// Client id - object manager gRPC client. - std::unordered_map> + std::unordered_map> remote_object_manager_clients_; /// Running sum of the amount of memory used in the object store. diff --git a/src/ray/object_manager/ownership_based_object_directory.cc b/src/ray/object_manager/ownership_based_object_directory.cc index 0a8d8c22d..c73bbe104 100644 --- a/src/ray/object_manager/ownership_based_object_directory.cc +++ b/src/ray/object_manager/ownership_based_object_directory.cc @@ -24,7 +24,7 @@ namespace { /// Filter out the removed clients from the object locations. void FilterRemovedClients(std::shared_ptr gcs_client, - std::unordered_set *node_ids) { + std::unordered_set *node_ids) { for (auto it = node_ids->begin(); it != node_ids->end();) { if (gcs_client->Nodes().IsRemoved(*it)) { it = node_ids->erase(it); @@ -64,7 +64,7 @@ std::shared_ptr OwnershipBasedObjectDirectory::GetClient( } ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) { WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id); rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info); @@ -91,7 +91,7 @@ ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded( } ray::Status OwnershipBasedObjectDirectory::ReportObjectRemoved( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) { WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id); rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info); @@ -126,9 +126,9 @@ void OwnershipBasedObjectDirectory::SubscriptionCallback( return; } - std::unordered_set client_ids; + std::unordered_set client_ids; for (auto const &client_id : reply.client_ids()) { - client_ids.emplace(ClientID::FromBinary(client_id)); + client_ids.emplace(NodeID::FromBinary(client_id)); } FilterRemovedClients(gcs_client_, &client_ids); if (client_ids != it->second.current_object_locations) { @@ -208,7 +208,7 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations( RAY_LOG(WARNING) << "Object " << object_id << " does not have owner. " << "LookupLocations returns an empty list of locations."; io_service_.post( - [callback, object_id]() { callback(object_id, std::unordered_set()); }); + [callback, object_id]() { callback(object_id, std::unordered_set()); }); return Status::OK(); } @@ -223,9 +223,9 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations( RAY_LOG(ERROR) << "Worker " << worker_id << " failed to get the location for " << object_id; } - std::unordered_set client_ids; + std::unordered_set client_ids; for (auto const &client_id : reply.client_ids()) { - client_ids.emplace(ClientID::FromBinary(client_id)); + client_ids.emplace(NodeID::FromBinary(client_id)); } FilterRemovedClients(gcs_client_, &client_ids); callback(object_id, client_ids); diff --git a/src/ray/object_manager/ownership_based_object_directory.h b/src/ray/object_manager/ownership_based_object_directory.h index 12b3f8de2..cdffa5be0 100644 --- a/src/ray/object_manager/ownership_based_object_directory.h +++ b/src/ray/object_manager/ownership_based_object_directory.h @@ -56,10 +56,10 @@ class OwnershipBasedObjectDirectory : public ObjectDirectory { const ObjectID &object_id) override; ray::Status ReportObjectAdded( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) override; ray::Status ReportObjectRemoved( - const ObjectID &object_id, const ClientID &client_id, + const ObjectID &object_id, const NodeID &client_id, const object_manager::protocol::ObjectInfoT &object_info) override; std::string DebugString() const override; diff --git a/src/ray/object_manager/plasma/common.h b/src/ray/object_manager/plasma/common.h index 75af867c8..7a7deebd6 100644 --- a/src/ray/object_manager/plasma/common.h +++ b/src/ray/object_manager/plasma/common.h @@ -34,7 +34,7 @@ namespace plasma { using ray::ObjectID; -using ray::ClientID; +using ray::NodeID; using ray::WorkerID; enum class ObjectLocation : int32_t { Local, Remote, Nonexistent }; @@ -81,7 +81,7 @@ struct ObjectTableEntry { /// Number of clients currently using this object. int ref_count; /// Owner's raylet ID. - ClientID owner_raylet_id; + NodeID owner_raylet_id; /// Owner's IP address. std::string owner_ip_address; /// Owner's port. diff --git a/src/ray/object_manager/plasma/protocol.cc b/src/ray/object_manager/plasma/protocol.cc index 85c61af78..c8f0d539e 100644 --- a/src/ray/object_manager/plasma/protocol.cc +++ b/src/ray/object_manager/plasma/protocol.cc @@ -206,7 +206,7 @@ Status SendCreateRequest(const std::shared_ptr &store_conn, ObjectID } Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id, - ClientID* owner_raylet_id, std::string* owner_ip_address, + NodeID* owner_raylet_id, std::string* owner_ip_address, int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full, int64_t* data_size, int64_t* metadata_size, int* device_num) { @@ -217,7 +217,7 @@ Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id, *data_size = message->data_size(); *metadata_size = message->metadata_size(); *object_id = ObjectID::FromBinary(message->object_id()->str()); - *owner_raylet_id = ClientID::FromBinary(message->owner_raylet_id()->str()); + *owner_raylet_id = NodeID::FromBinary(message->owner_raylet_id()->str()); *owner_ip_address = message->owner_ip_address()->str(); *owner_port = message->owner_port(); *owner_worker_id = WorkerID::FromBinary(message->owner_worker_id()->str()); diff --git a/src/ray/object_manager/plasma/protocol.h b/src/ray/object_manager/plasma/protocol.h index a321b0340..7fd7a0213 100644 --- a/src/ray/object_manager/plasma/protocol.h +++ b/src/ray/object_manager/plasma/protocol.h @@ -85,7 +85,7 @@ Status SendCreateRequest(const std::shared_ptr &store_conn, ObjectID int64_t data_size, int64_t metadata_size, int device_num); Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id, - ClientID* owner_raylet_id, std::string* owner_ip_address, + NodeID* owner_raylet_id, std::string* owner_ip_address, int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full, int64_t* data_size, int64_t* metadata_size, int* device_num); diff --git a/src/ray/object_manager/plasma/store.cc b/src/ray/object_manager/plasma/store.cc index 4982048be..4156bb1d0 100644 --- a/src/ray/object_manager/plasma/store.cc +++ b/src/ray/object_manager/plasma/store.cc @@ -227,7 +227,7 @@ Status PlasmaStore::FreeCudaMemory(int device_num, int64_t size, uint8_t* pointe // Create a new object buffer in the hash table. PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id, - const ClientID& owner_raylet_id, + const NodeID& owner_raylet_id, const std::string& owner_ip_address, int owner_port, const WorkerID& owner_worker_id, bool evict_if_full, int64_t data_size, @@ -884,7 +884,7 @@ Status PlasmaStore::ProcessMessage(const std::shared_ptr &client, // Process the different types of requests. switch (type) { case fb::MessageType::PlasmaCreateRequest: { - ClientID owner_raylet_id; + NodeID owner_raylet_id; std::string owner_ip_address; int owner_port; WorkerID owner_worker_id; diff --git a/src/ray/object_manager/plasma/store.h b/src/ray/object_manager/plasma/store.h index 29fce0e5e..29bf3cf04 100644 --- a/src/ray/object_manager/plasma/store.h +++ b/src/ray/object_manager/plasma/store.h @@ -94,7 +94,7 @@ class PlasmaStore { /// - PlasmaError::OutOfMemory, if the store is out of memory and /// cannot create the object. In this case, the client should not call /// plasma_release. - PlasmaError CreateObject(const ObjectID& object_id, const ClientID& owner_raylet_id, + PlasmaError CreateObject(const ObjectID& object_id, const NodeID& owner_raylet_id, const std::string& owner_ip_address, int owner_port, const WorkerID& owner_worker_id, bool evict_if_full, int64_t data_size, int64_t metadata_size, int device_num, diff --git a/src/ray/object_manager/test/object_manager_stress_test.cc b/src/ray/object_manager/test/object_manager_stress_test.cc index 60c7ab4b6..60ca1e967 100644 --- a/src/ray/object_manager/test/object_manager_stress_test.cc +++ b/src/ray/object_manager/test/object_manager_stress_test.cc @@ -50,7 +50,7 @@ class MockServer { MockServer(boost::asio::io_service &main_service, const ObjectManagerConfig &object_manager_config, std::shared_ptr gcs_client) - : node_id_(ClientID::FromRandom()), + : node_id_(NodeID::FromRandom()), config_(object_manager_config), gcs_client_(gcs_client), object_manager_(main_service, node_id_, object_manager_config, @@ -75,7 +75,7 @@ class MockServer { friend class StressTestObjectManager; - ClientID node_id_; + NodeID node_id_; ObjectManagerConfig config_; std::shared_ptr gcs_client_; ObjectManager object_manager_; @@ -199,8 +199,8 @@ class StressTestObjectManager : public TestObjectManagerBase { int num_connected_clients = 0; - ClientID node_id_1; - ClientID node_id_2; + NodeID node_id_1; + NodeID node_id_2; int64_t start_time; @@ -208,7 +208,7 @@ class StressTestObjectManager : public TestObjectManagerBase { node_id_1 = gcs_client_1->Nodes().GetSelfId(); node_id_2 = gcs_client_2->Nodes().GetSelfId(); RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange( - [this](const ClientID &node_id, const GcsNodeInfo &data) { + [this](const NodeID &node_id, const GcsNodeInfo &data) { if (node_id == node_id_1 || node_id == node_id_2) { num_connected_clients += 1; } @@ -218,7 +218,7 @@ class StressTestObjectManager : public TestObjectManagerBase { }, nullptr)); RAY_CHECK_OK(gcs_client_2->Nodes().AsyncSubscribeToNodeChange( - [this](const ClientID &node_id, const GcsNodeInfo &data) { + [this](const NodeID &node_id, const GcsNodeInfo &data) { if (node_id == node_id_1 || node_id == node_id_2) { num_connected_clients += 1; } @@ -310,8 +310,8 @@ class StressTestObjectManager : public TestObjectManagerBase { void TransferTestExecute(int num_trials, int64_t data_size, TransferPattern transfer_pattern) { - ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId(); - ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId(); + NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId(); + NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId(); ray::Status status = ray::Status::OK(); @@ -388,8 +388,8 @@ class StressTestObjectManager : public TestObjectManagerBase { RAY_LOG(DEBUG) << "\n" << "Server node ids:" << "\n"; - ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId(); - ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId(); + NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId(); + NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId(); RAY_LOG(DEBUG) << "Server 1: " << node_id_1 << "\n" << "Server 2: " << node_id_2; @@ -397,11 +397,11 @@ class StressTestObjectManager : public TestObjectManagerBase { << "All connected nodes:" << "\n"; auto data = gcs_client_1->Nodes().Get(node_id_1); - RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data->node_id()) << "\n" + RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data->node_id()) << "\n" << "NodeIp=" << data->node_manager_address() << "\n" << "NodePort=" << data->node_manager_port(); auto data2 = gcs_client_1->Nodes().Get(node_id_2); - RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data2->node_id()) << "\n" + RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data2->node_id()) << "\n" << "NodeIp=" << data2->node_manager_address() << "\n" << "NodePort=" << data2->node_manager_port(); } diff --git a/src/ray/object_manager/test/object_manager_test.cc b/src/ray/object_manager/test/object_manager_test.cc index 77324af04..80bdfd31e 100644 --- a/src/ray/object_manager/test/object_manager_test.cc +++ b/src/ray/object_manager/test/object_manager_test.cc @@ -46,7 +46,7 @@ class MockServer { MockServer(boost::asio::io_service &main_service, const ObjectManagerConfig &object_manager_config, std::shared_ptr gcs_client) - : node_id_(ClientID::FromRandom()), + : node_id_(NodeID::FromRandom()), config_(object_manager_config), gcs_client_(gcs_client), object_manager_(main_service, node_id_, object_manager_config, @@ -71,7 +71,7 @@ class MockServer { friend class TestObjectManager; - ClientID node_id_; + NodeID node_id_; ObjectManagerConfig config_; std::shared_ptr gcs_client_; ObjectManager object_manager_; @@ -180,8 +180,8 @@ class TestObjectManager : public TestObjectManagerBase { public: int current_wait_test = -1; int num_connected_clients = 0; - ClientID node_id_1; - ClientID node_id_2; + NodeID node_id_1; + NodeID node_id_2; ObjectID created_object_id1; ObjectID created_object_id2; @@ -192,7 +192,7 @@ class TestObjectManager : public TestObjectManagerBase { node_id_1 = gcs_client_1->Nodes().GetSelfId(); node_id_2 = gcs_client_2->Nodes().GetSelfId(); RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange( - [this](const ClientID &node_id, const GcsNodeInfo &data) { + [this](const NodeID &node_id, const GcsNodeInfo &data) { if (node_id == node_id_1 || node_id == node_id_2) { num_connected_clients += 1; } @@ -264,7 +264,7 @@ class TestObjectManager : public TestObjectManagerBase { sub_id, object_1, rpc::Address(), [this, sub_id, object_1, object_2]( const ray::ObjectID &object_id, - const std::unordered_set &clients) { + const std::unordered_set &clients) { if (!clients.empty()) { TestWaitWhileSubscribed(sub_id, object_1, object_2); } @@ -430,16 +430,16 @@ class TestObjectManager : public TestObjectManagerBase { << "Server node ids:" << "\n"; auto data = gcs_client_1->Nodes().Get(node_id_1); - RAY_LOG(DEBUG) << (ClientID::FromBinary(data->node_id()).IsNil()); - RAY_LOG(DEBUG) << "Server 1 NodeID=" << ClientID::FromBinary(data->node_id()); + RAY_LOG(DEBUG) << (NodeID::FromBinary(data->node_id()).IsNil()); + RAY_LOG(DEBUG) << "Server 1 NodeID=" << NodeID::FromBinary(data->node_id()); RAY_LOG(DEBUG) << "Server 1 NodeIp=" << data->node_manager_address(); RAY_LOG(DEBUG) << "Server 1 NodePort=" << data->node_manager_port(); - ASSERT_EQ(node_id_1, ClientID::FromBinary(data->node_id())); + ASSERT_EQ(node_id_1, NodeID::FromBinary(data->node_id())); auto data2 = gcs_client_1->Nodes().Get(node_id_2); - RAY_LOG(DEBUG) << "Server 2 NodeID=" << ClientID::FromBinary(data2->node_id()); + RAY_LOG(DEBUG) << "Server 2 NodeID=" << NodeID::FromBinary(data2->node_id()); RAY_LOG(DEBUG) << "Server 2 NodeIp=" << data2->node_manager_address(); RAY_LOG(DEBUG) << "Server 2 NodePort=" << data2->node_manager_port(); - ASSERT_EQ(node_id_2, ClientID::FromBinary(data2->node_id())); + ASSERT_EQ(node_id_2, NodeID::FromBinary(data2->node_id())); } }; diff --git a/src/ray/raylet/actor_registration.cc b/src/ray/raylet/actor_registration.cc index 4149dce50..5863cd91c 100644 --- a/src/ray/raylet/actor_registration.cc +++ b/src/ray/raylet/actor_registration.cc @@ -49,8 +49,8 @@ ActorRegistration::ActorRegistration(const ActorTableData &actor_table_data, } } -const ClientID ActorRegistration::GetNodeManagerId() const { - return ClientID::FromBinary(actor_table_data_.address().raylet_id()); +const NodeID ActorRegistration::GetNodeManagerId() const { + return NodeID::FromBinary(actor_table_data_.address().raylet_id()); } const ObjectID ActorRegistration::GetActorCreationDependency() const { diff --git a/src/ray/raylet/actor_registration.h b/src/ray/raylet/actor_registration.h index 34f88aaea..8a2895173 100644 --- a/src/ray/raylet/actor_registration.h +++ b/src/ray/raylet/actor_registration.h @@ -77,7 +77,7 @@ class ActorRegistration { /// /// \return The actor's node manager location. All tasks for the actor should /// be forwarded to this node. - const ClientID GetNodeManagerId() const; + const NodeID GetNodeManagerId() const; /// Get the object that represents the actor's initial state. This is the /// execution dependency returned by this actor's creation task. If diff --git a/src/ray/raylet/agent_manager.h b/src/ray/raylet/agent_manager.h index 3c79b31ba..e86902968 100644 --- a/src/ray/raylet/agent_manager.h +++ b/src/ray/raylet/agent_manager.h @@ -33,7 +33,7 @@ typedef std::function(std::function class AgentManager : public rpc::AgentManagerServiceHandler { public: struct Options { - const ClientID node_id; + const NodeID node_id; std::vector agent_commands; }; diff --git a/src/ray/raylet/format/node_manager.fbs b/src/ray/raylet/format/node_manager.fbs index 1f2cba6ce..d3faef5e9 100644 --- a/src/ray/raylet/format/node_manager.fbs +++ b/src/ray/raylet/format/node_manager.fbs @@ -154,7 +154,7 @@ table RegisterClientReply { success: bool; // The reason of registration failure. failure_reason: string; - // GCS ClientID of the local node manager. + // GCS NodeID of the local node manager. raylet_id: string; // Port that this worker should listen on. port: int; diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 099ff4b48..3cce1eb45 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -117,9 +117,8 @@ std::string WorkerOwnerString(std::shared_ptr &worker) { return buffer.str(); } -NodeManager::NodeManager(boost::asio::io_service &io_service, - const ClientID &self_node_id, const NodeManagerConfig &config, - ObjectManager &object_manager, +NodeManager::NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id, + const NodeManagerConfig &config, ObjectManager &object_manager, std::shared_ptr gcs_client, std::shared_ptr object_directory) : self_node_id_(self_node_id), @@ -194,7 +193,7 @@ NodeManager::NodeManager(boost::asio::io_service &io_service, return args_ready; }; - auto get_node_info_func = [this](const ClientID &node_id) { + auto get_node_info_func = [this](const NodeID &node_id) { return gcs_client_->Nodes().Get(node_id); }; cluster_task_manager_ = std::shared_ptr( @@ -231,7 +230,7 @@ ray::Status NodeManager::RegisterGcs() { RAY_RETURN_NOT_OK( gcs_client_->Actors().AsyncSubscribeAll(actor_notification_callback, nullptr)); - auto on_node_change = [this](const ClientID &node_id, const GcsNodeInfo &data) { + auto on_node_change = [this](const NodeID &node_id, const GcsNodeInfo &data) { if (data.state() == GcsNodeInfo::ALIVE) { NodeAdded(data); } else { @@ -247,7 +246,7 @@ ray::Status NodeManager::RegisterGcs() { // Subscribe to resource changes. const auto &resources_changed = [this](const rpc::NodeResourceChange &resource_notification) { - auto id = ClientID::FromBinary(resource_notification.node_id()); + auto id = NodeID::FromBinary(resource_notification.node_id()); if (resource_notification.updated_resources_size() != 0) { ResourceSet resource_set( MapFromProtobuf(resource_notification.updated_resources())); @@ -720,7 +719,7 @@ void NodeManager::GetObjectManagerProfileInfo() { } void NodeManager::NodeAdded(const GcsNodeInfo &node_info) { - const ClientID node_id = ClientID::FromBinary(node_info.node_id()); + const NodeID node_id = NodeID::FromBinary(node_info.node_id()); RAY_LOG(DEBUG) << "[NodeAdded] Received callback from client id " << node_id; if (1 == cluster_resource_map_.count(node_id)) { @@ -761,7 +760,7 @@ void NodeManager::NodeAdded(const GcsNodeInfo &node_info) { void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) { // TODO(swang): If we receive a notification for our own death, clean up and // exit immediately. - const ClientID node_id = ClientID::FromBinary(node_info.node_id()); + const NodeID node_id = NodeID::FromBinary(node_info.node_id()); RAY_LOG(DEBUG) << "[NodeRemoved] Received callback from client id " << node_id; RAY_CHECK(node_id != self_node_id_) @@ -810,7 +809,7 @@ void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) { void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) { const WorkerID worker_id = WorkerID::FromBinary(address.worker_id()); - const ClientID node_id = ClientID::FromBinary(address.raylet_id()); + const NodeID node_id = NodeID::FromBinary(address.raylet_id()); if (!worker_id.IsNil()) { RAY_LOG(DEBUG) << "Worker " << worker_id << " failed"; failed_workers_cache_.insert(worker_id); @@ -850,7 +849,7 @@ void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) { } } -void NodeManager::ResourceCreateUpdated(const ClientID &client_id, +void NodeManager::ResourceCreateUpdated(const NodeID &client_id, const ResourceSet &createUpdatedResources) { RAY_LOG(DEBUG) << "[ResourceCreateUpdated] received callback from client id " << client_id << " with created or updated resources: " @@ -882,7 +881,7 @@ void NodeManager::ResourceCreateUpdated(const ClientID &client_id, return; } -void NodeManager::ResourceDeleted(const ClientID &client_id, +void NodeManager::ResourceDeleted(const NodeID &client_id, const std::vector &resource_names) { if (RAY_LOG_ENABLED(DEBUG)) { std::ostringstream oss; @@ -929,7 +928,7 @@ void NodeManager::TryLocalInfeasibleTaskScheduling() { } } -void NodeManager::HeartbeatAdded(const ClientID &client_id, +void NodeManager::HeartbeatAdded(const NodeID &client_id, const HeartbeatTableData &heartbeat_data) { // Locate the client id in remote client table and update available resources based on // the received heartbeat information. @@ -1009,7 +1008,7 @@ void NodeManager::HeartbeatAdded(const ClientID &client_id, void NodeManager::HeartbeatBatchAdded(const HeartbeatBatchTableData &heartbeat_batch) { // Update load information provided by each heartbeat. for (const auto &heartbeat_data : heartbeat_batch.batch()) { - const ClientID &client_id = ClientID::FromBinary(heartbeat_data.client_id()); + const NodeID &client_id = NodeID::FromBinary(heartbeat_data.client_id()); if (client_id == self_node_id_) { // Skip heartbeats from self. continue; @@ -1790,7 +1789,7 @@ void NodeManager::HandleRequestWorkerLease(const rpc::RequestWorkerLeaseRequest leased_workers_[worker_id] = worker; }); task.OnSpillbackInstead( - [reply, task_id, send_reply_callback](const ClientID &spillback_to, + [reply, task_id, send_reply_callback](const NodeID &spillback_to, const std::string &address, int port) { RAY_LOG(DEBUG) << "Worker lease request SPILLBACK " << task_id; reply->mutable_retry_at_raylet_address()->set_ip_address(address); @@ -2012,7 +2011,7 @@ void NodeManager::ProcessSetResourceRequest( double const &capacity = message->capacity(); bool is_deletion = capacity <= 0; - ClientID node_id = from_flatbuf(*message->client_id()); + NodeID node_id = from_flatbuf(*message->client_id()); // If the python arg was null, set node_id to the local node id. if (node_id.IsNil()) { @@ -2044,7 +2043,7 @@ void NodeManager::ProcessSetResourceRequest( } bool NodeManager::PrepareBundle( - std::unordered_map &resource_map, + std::unordered_map &resource_map, const BundleSpecification &bundle_spec) { // TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent // once retry is implemented. If the resource map contains the local raylet, update load @@ -2080,7 +2079,7 @@ bool NodeManager::PrepareBundle( } void NodeManager::CommitBundle( - std::unordered_map &resource_map, + std::unordered_map &resource_map, const BundleSpecification &bundle_spec) { // TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent // once retry is implemented. @@ -2106,7 +2105,7 @@ void NodeManager::CommitBundle( } void NodeManager::ScheduleTasks( - std::unordered_map &resource_map) { + std::unordered_map &resource_map) { // If the resource map contains the local raylet, update load before calling policy. if (resource_map.count(self_node_id_) > 0) { resource_map[self_node_id_].SetLoadResources(local_queues_.GetTotalResourceLoad()); @@ -2118,17 +2117,17 @@ void NodeManager::ScheduleTasks( RAY_LOG(DEBUG) << "[NM ScheduleTasks] policy decision:"; for (const auto &task_client_pair : policy_decision) { TaskID task_id = task_client_pair.first; - ClientID node_id = task_client_pair.second; + NodeID node_id = task_client_pair.second; RAY_LOG(DEBUG) << task_id << " --> " << node_id; } #endif // Extract decision for this raylet. std::unordered_set local_task_ids; - // Iterate over (taskid, clientid) pairs, extract tasks assigned to the local node. + // Iterate over (taskid, nodeid) pairs, extract tasks assigned to the local node. for (const auto &task_client_pair : policy_decision) { const TaskID &task_id = task_client_pair.first; - const ClientID &node_id = task_client_pair.second; + const NodeID &node_id = task_client_pair.second; if (node_id == self_node_id_) { local_task_ids.insert(task_id); } else { @@ -2300,7 +2299,7 @@ void NodeManager::TreatTaskAsFailedIfLost(const Task &task) { object_id, spec.CallerAddress(), [this, task_marked_as_failed, task]( const ray::ObjectID &object_id, - const std::unordered_set &clients) { + const std::unordered_set &clients) { if (!*task_marked_as_failed) { // Only process the object locations if we haven't already marked the // task as failed. @@ -2604,7 +2603,7 @@ void NodeManager::AssignTask(const std::shared_ptr &worker, worker->SetPlacementGroupId(spec.PlacementGroupId()); const auto owner_worker_id = WorkerID::FromBinary(spec.CallerAddress().worker_id()); - const auto owner_node_id = ClientID::FromBinary(spec.CallerAddress().raylet_id()); + const auto owner_node_id = NodeID::FromBinary(spec.CallerAddress().raylet_id()); RAY_CHECK(!owner_worker_id.IsNil()); RAY_LOG(DEBUG) << "Worker lease request DISPATCH " << task_id << " to worker " << worker->WorkerId() << ", owner ID " << owner_worker_id; @@ -2935,8 +2934,7 @@ void NodeManager::HandleObjectMissing(const ObjectID &object_id) { } } -void NodeManager::ForwardTaskOrResubmit(const Task &task, - const ClientID &node_manager_id) { +void NodeManager::ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id) { // Attempt to forward the task. // TODO(sang): Modify method names. ForwardTask(task, node_manager_id, @@ -2957,7 +2955,7 @@ void NodeManager::ForwardTaskOrResubmit(const Task &task, } void NodeManager::ForwardTask( - const Task &task, const ClientID &node_id, + const Task &task, const NodeID &node_id, const std::function &on_error) { // This method spillbacks lease requests to other nodes. // TODO(sang): Modify method names. diff --git a/src/ray/raylet/node_manager.h b/src/ray/raylet/node_manager.h index c96db6d19..d1a720219 100644 --- a/src/ray/raylet/node_manager.h +++ b/src/ray/raylet/node_manager.h @@ -127,7 +127,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// /// \param resource_config The initial set of node resources. /// \param object_manager A reference to the local object manager. - NodeManager(boost::asio::io_service &io_service, const ClientID &self_node_id, + NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id, const NodeManagerConfig &config, ObjectManager &object_manager, std::shared_ptr gcs_client, std::shared_ptr object_directory_); @@ -191,14 +191,14 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// \param client_id ID of the node that created or updated resources. /// \param createUpdatedResources Created or updated resources. /// \return Void. - void ResourceCreateUpdated(const ClientID &client_id, + void ResourceCreateUpdated(const NodeID &client_id, const ResourceSet &createUpdatedResources); /// Handler for the deletion of a resource in the GCS /// \param client_id ID of the node that deleted resources. /// \param resource_names Names of deleted resources. /// \return Void. - void ResourceDeleted(const ClientID &client_id, + void ResourceDeleted(const NodeID &client_id, const std::vector &resource_names); /// Evaluates the local infeasible queue to check if any tasks can be scheduled. @@ -226,7 +226,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// \param id The ID of the node manager that sent the heartbeat. /// \param data The heartbeat data including load information. /// \return Void. - void HeartbeatAdded(const ClientID &id, const HeartbeatTableData &data); + void HeartbeatAdded(const NodeID &id, const HeartbeatTableData &data); /// Handler for a heartbeat batch notification from the GCS /// /// \param heartbeat_batch The batch of heartbeat data. @@ -308,7 +308,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// consider the local node manager and the node managers in the keys of the /// resource_map argument. /// \return Void. - void ScheduleTasks(std::unordered_map &resource_map); + void ScheduleTasks(std::unordered_map &resource_map); /// Make a placement decision for the resource_map and subtract original resources so /// that the node is ready to commit (create) placement group resources. @@ -319,7 +319,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// resource_map argument. /// \param bundle_spec Specification of bundle that will be prepared. /// \return True is resources were successfully prepared. False otherwise. - bool PrepareBundle(std::unordered_map &resource_map, + bool PrepareBundle(std::unordered_map &resource_map, const BundleSpecification &bundle_spec); /// Make a placement decision for the resource_map. @@ -329,7 +329,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// consider the local node manager and the node managers in the keys of the /// resource_map argument. /// \param bundle_spec Specification of bundle that will be prepared. - void CommitBundle(std::unordered_map &resource_map, + void CommitBundle(std::unordered_map &resource_map, const BundleSpecification &bundle_spec); /// Handle a task whose return value(s) must be reconstructed. @@ -346,7 +346,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// \param task The task in question. /// \param node_manager_id The ID of the remote node manager. /// \return Void. - void ForwardTaskOrResubmit(const Task &task, const ClientID &node_manager_id); + void ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id); /// Forward a task to another node to execute. The task is assumed to not be /// queued in local_queues_. /// @@ -354,7 +354,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// \param node_id The ID of the node to forward the task to. /// \param on_error Callback on run on non-ok status. void ForwardTask( - const Task &task, const ClientID &node_id, + const Task &task, const NodeID &node_id, const std::function &on_error); /// Dispatch locally scheduled tasks. This attempts the transition from "scheduled" to @@ -681,7 +681,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { bool IsActorCreationTask(const TaskID &task_id); /// ID of this node. - ClientID self_node_id_; + NodeID self_node_id_; boost::asio::io_service &io_service_; ObjectManager &object_manager_; /// A Plasma object store client. This is used for creating new objects in @@ -733,7 +733,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { const NodeManagerConfig initial_config_; /// The resources (and specific resource IDs) that are currently available. ResourceIdSet local_available_resources_; - std::unordered_map cluster_resource_map_; + std::unordered_map cluster_resource_map_; /// A pool of workers. WorkerPool worker_pool_; @@ -772,7 +772,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { rpc::ClientCallManager client_call_manager_; /// Map from node ids to clients of the remote node managers. - std::unordered_map> + std::unordered_map> remote_node_manager_clients_; /// Map of workers leased out to direct call clients. @@ -813,7 +813,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler { /// Cache for the WorkerTable in the GCS. absl::flat_hash_set failed_workers_cache_; /// Cache for the ClientTable in the GCS. - absl::flat_hash_set failed_nodes_cache_; + absl::flat_hash_set failed_nodes_cache_; /// Concurrency for the following map mutable absl::Mutex plasma_object_notification_lock_; diff --git a/src/ray/raylet/object_manager_integration_test.cc b/src/ray/raylet/object_manager_integration_test.cc index 2b6d0711f..5f950f46e 100644 --- a/src/ray/raylet/object_manager_integration_test.cc +++ b/src/ray/raylet/object_manager_integration_test.cc @@ -128,14 +128,14 @@ class TestObjectManagerIntegration : public TestObjectManagerBase { int num_connected_clients = 0; - ClientID node_id_1; - ClientID node_id_2; + NodeID node_id_1; + NodeID node_id_2; void WaitConnections() { node_id_1 = gcs_client_1->Nodes().GetSelfId(); node_id_2 = gcs_client_2->Nodes().GetSelfId(); gcs_client_1->Nodes().AsyncSubscribeToNodeChange( - [this](const ClientID &node_id, const rpc::GcsNodeInfo &data) { + [this](const NodeID &node_id, const rpc::GcsNodeInfo &data) { if (node_id == node_id_1 || node_id == node_id_2) { num_connected_clients += 1; } @@ -196,8 +196,8 @@ class TestObjectManagerIntegration : public TestObjectManagerBase { RAY_LOG(INFO) << "\n" << "Server client ids:" << "\n"; - ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId(); - ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId(); + NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId(); + NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId(); RAY_LOG(INFO) << "Server 1: " << node_id_1; RAY_LOG(INFO) << "Server 2: " << node_id_2; @@ -205,13 +205,13 @@ class TestObjectManagerIntegration : public TestObjectManagerBase { << "All connected clients:" << "\n"; auto data = gcs_client_2->Nodes().Get(node_id_1); - RAY_LOG(INFO) << (ClientID::FromBinary(data->node_id()).IsNil()); - RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data->node_id()); + RAY_LOG(INFO) << (NodeID::FromBinary(data->node_id()).IsNil()); + RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data->node_id()); RAY_LOG(INFO) << "ClientIp=" << data->node_manager_address(); RAY_LOG(INFO) << "ClientPort=" << data->node_manager_port(); rpc::GcsNodeInfo data2; gcs_client_1->Nodes().Get(node_id_2); - RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data2->node_id()); + RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data2->node_id()); RAY_LOG(INFO) << "ClientIp=" << data2->node_manager_address(); RAY_LOG(INFO) << "ClientPort=" << data2->node_manager_port(); } diff --git a/src/ray/raylet/raylet.cc b/src/ray/raylet/raylet.cc index 29c6d6891..b63513b53 100644 --- a/src/ray/raylet/raylet.cc +++ b/src/ray/raylet/raylet.cc @@ -60,7 +60,7 @@ Raylet::Raylet(boost::asio::io_service &main_service, const std::string &socket_ const NodeManagerConfig &node_manager_config, const ObjectManagerConfig &object_manager_config, std::shared_ptr gcs_client, int metrics_export_port) - : self_node_id_(ClientID::FromRandom()), + : self_node_id_(NodeID::FromRandom()), gcs_client_(gcs_client), object_directory_( RayConfig::instance().ownership_based_object_directory_enabled() diff --git a/src/ray/raylet/raylet.h b/src/ray/raylet/raylet.h index fe7a6632f..99b754fc8 100644 --- a/src/ray/raylet/raylet.h +++ b/src/ray/raylet/raylet.h @@ -77,7 +77,7 @@ class Raylet { friend class TestObjectManagerIntegration; /// ID of this node. - ClientID self_node_id_; + NodeID self_node_id_; /// Information of this node. GcsNodeInfo self_node_info_; diff --git a/src/ray/raylet/reconstruction_policy.cc b/src/ray/raylet/reconstruction_policy.cc index 3e94d12b1..98b49dca4 100644 --- a/src/ray/raylet/reconstruction_policy.cc +++ b/src/ray/raylet/reconstruction_policy.cc @@ -23,7 +23,7 @@ namespace raylet { ReconstructionPolicy::ReconstructionPolicy( boost::asio::io_service &io_service, std::function reconstruction_handler, - int64_t initial_reconstruction_timeout_ms, const ClientID &client_id, + int64_t initial_reconstruction_timeout_ms, const NodeID &client_id, std::shared_ptr gcs_client, std::shared_ptr object_directory) : io_service_(io_service), @@ -88,7 +88,7 @@ void ReconstructionPolicy::OnTaskLeaseNotification( return; } - const ClientID node_manager_id = ClientID::FromBinary(task_lease->node_manager_id()); + const NodeID node_manager_id = NodeID::FromBinary(task_lease->node_manager_id()); if (gcs_client_->Nodes().IsRemoved(node_manager_id)) { // The node manager that added the task lease is already removed. The // lease is considered inactive. @@ -179,7 +179,7 @@ void ReconstructionPolicy::HandleTaskLeaseExpired(const TaskID &task_id) { created_object_id, it->second.owner_addresses[created_object_id], [this, task_id, reconstruction_attempt]( const ray::ObjectID &object_id, - const std::unordered_set &clients) { + const std::unordered_set &clients) { if (clients.empty()) { // The required object no longer exists on any live nodes. Attempt // reconstruction. diff --git a/src/ray/raylet/reconstruction_policy.h b/src/ray/raylet/reconstruction_policy.h index 59286f06c..ec3b66252 100644 --- a/src/ray/raylet/reconstruction_policy.h +++ b/src/ray/raylet/reconstruction_policy.h @@ -54,7 +54,7 @@ class ReconstructionPolicy : public ReconstructionPolicyInterface { ReconstructionPolicy( boost::asio::io_service &io_service, std::function reconstruction_handler, - int64_t initial_reconstruction_timeout_ms, const ClientID &client_id, + int64_t initial_reconstruction_timeout_ms, const NodeID &client_id, std::shared_ptr gcs_client, std::shared_ptr object_directory); @@ -152,7 +152,7 @@ class ReconstructionPolicy : public ReconstructionPolicyInterface { /// received. Otherwise, reconstruction will be triggered. const int64_t initial_reconstruction_timeout_ms_; /// The client ID to use when requesting notifications from the GCS. - const ClientID client_id_; + const NodeID client_id_; /// A client connection to the GCS. std::shared_ptr gcs_client_; /// The object directory used to lookup object locations. diff --git a/src/ray/raylet/reconstruction_policy_test.cc b/src/ray/raylet/reconstruction_policy_test.cc index e56fa52ce..bb4168466 100644 --- a/src/ray/raylet/reconstruction_policy_test.cc +++ b/src/ray/raylet/reconstruction_policy_test.cc @@ -56,7 +56,7 @@ class MockObjectDirectory : public ObjectDirectoryInterface { const ObjectID object_id = callback.first; auto it = locations_.find(object_id); if (it == locations_.end()) { - callback.second(object_id, std::unordered_set()); + callback.second(object_id, std::unordered_set()); } else { callback.second(object_id, it->second); } @@ -65,11 +65,11 @@ class MockObjectDirectory : public ObjectDirectoryInterface { } void SetObjectLocations(const ObjectID &object_id, - const std::unordered_set &locations) { + const std::unordered_set &locations) { locations_[object_id] = locations; } - void HandleClientRemoved(const ClientID &client_id) override { + void HandleClientRemoved(const NodeID &client_id) override { for (auto &locations : locations_) { locations.second.erase(client_id); } @@ -77,7 +77,7 @@ class MockObjectDirectory : public ObjectDirectoryInterface { std::string DebugString() const override { return ""; } - MOCK_METHOD0(GetLocalClientID, ray::ClientID()); + MOCK_METHOD0(GetLocalClientID, ray::NodeID()); MOCK_CONST_METHOD1(LookupRemoteConnectionInfo, void(RemoteConnectionInfo &)); MOCK_CONST_METHOD0(LookupAllRemoteConnections, std::vector()); MOCK_METHOD4(SubscribeObjectLocations, @@ -86,15 +86,15 @@ class MockObjectDirectory : public ObjectDirectoryInterface { MOCK_METHOD2(UnsubscribeObjectLocations, ray::Status(const ray::UniqueID &, const ObjectID &)); MOCK_METHOD3(ReportObjectAdded, - ray::Status(const ObjectID &, const ClientID &, + ray::Status(const ObjectID &, const NodeID &, const object_manager::protocol::ObjectInfoT &)); MOCK_METHOD3(ReportObjectRemoved, - ray::Status(const ObjectID &, const ClientID &, + ray::Status(const ObjectID &, const NodeID &, const object_manager::protocol::ObjectInfoT &)); private: std::vector> callbacks_; - std::unordered_map> locations_; + std::unordered_map> locations_; }; class MockNodeInfoAccessor : public gcs::RedisNodeInfoAccessor { @@ -102,7 +102,7 @@ class MockNodeInfoAccessor : public gcs::RedisNodeInfoAccessor { MockNodeInfoAccessor(gcs::RedisGcsClient *client) : gcs::RedisNodeInfoAccessor(client) {} - bool IsRemoved(const ClientID &node_id) const override { return false; } + bool IsRemoved(const NodeID &node_id) const override { return false; } }; class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor { @@ -204,7 +204,7 @@ class ReconstructionPolicyTest : public ::testing::Test { [this](const TaskID &task_id, const ObjectID &obj) { TriggerReconstruction(task_id); }, - reconstruction_timeout_ms_, ClientID::FromRandom(), mock_gcs_, + reconstruction_timeout_ms_, NodeID::FromRandom(), mock_gcs_, mock_object_directory_)), timer_canceled_(false) { subscribe_callback_ = [this](const TaskID &task_id, @@ -298,7 +298,7 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionSimple) { TEST_F(ReconstructionPolicyTest, TestReconstructionEvicted) { TaskID task_id = ForNormalTask(); ObjectID object_id = ObjectID::FromIndex(task_id, /*index=*/1); - mock_object_directory_->SetObjectLocations(object_id, {ClientID::FromRandom()}); + mock_object_directory_->SetObjectLocations(object_id, {NodeID::FromRandom()}); // Listen for both objects. reconstruction_policy_->ListenAndMaybeReconstruct(object_id, rpc::Address()); @@ -310,7 +310,7 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionEvicted) { // Simulate evicting one of the objects. mock_object_directory_->SetObjectLocations(object_id, - std::unordered_set()); + std::unordered_set()); // Run the test again. Run(reconstruction_timeout_ms_ * 1.1); // Check that reconstruction was triggered, since one of the objects was @@ -321,7 +321,7 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionEvicted) { TEST_F(ReconstructionPolicyTest, TestReconstructionObjectLost) { TaskID task_id = ForNormalTask(); ObjectID object_id = ObjectID::FromIndex(task_id, /*index=*/1); - ClientID client_id = ClientID::FromRandom(); + NodeID client_id = NodeID::FromRandom(); mock_object_directory_->SetObjectLocations(object_id, {client_id}); // Listen for both objects. @@ -370,7 +370,7 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionSuppressed) { // Acquire the task lease for a period longer than the test period. auto task_lease_data = std::make_shared(); - task_lease_data->set_node_manager_id(ClientID::FromRandom().Binary()); + task_lease_data->set_node_manager_id(NodeID::FromRandom().Binary()); task_lease_data->set_acquired_at(absl::GetCurrentTimeNanos() / 1000000); task_lease_data->set_timeout(2 * test_period); task_lease_data->set_task_id(task_id.Binary()); @@ -398,7 +398,7 @@ TEST_F(ReconstructionPolicyTest, TestReconstructionContinuallySuppressed) { // Send the reconstruction manager heartbeats about the object. SetPeriodicTimer(reconstruction_timeout_ms_ / 2, [this, task_id]() { auto task_lease_data = std::make_shared(); - task_lease_data->set_node_manager_id(ClientID::FromRandom().Binary()); + task_lease_data->set_node_manager_id(NodeID::FromRandom().Binary()); task_lease_data->set_acquired_at(absl::GetCurrentTimeNanos() / 1000000); task_lease_data->set_timeout(reconstruction_timeout_ms_); task_lease_data->set_task_id(task_id.Binary()); @@ -452,7 +452,7 @@ TEST_F(ReconstructionPolicyTest, TestSimultaneousReconstructionSuppressed) { // reconstruction. auto task_reconstruction_data = std::make_shared(); task_reconstruction_data->set_task_id(task_id.Binary()); - task_reconstruction_data->set_node_manager_id(ClientID::FromRandom().Binary()); + task_reconstruction_data->set_node_manager_id(NodeID::FromRandom().Binary()); task_reconstruction_data->set_num_reconstructions(0); RAY_CHECK_OK(mock_gcs_->Tasks().AttemptTaskReconstruction( task_reconstruction_data, diff --git a/src/ray/raylet/scheduling/cluster_task_manager.cc b/src/ray/raylet/scheduling/cluster_task_manager.cc index bd0762ab8..349bf76b6 100644 --- a/src/ray/raylet/scheduling/cluster_task_manager.cc +++ b/src/ray/raylet/scheduling/cluster_task_manager.cc @@ -6,7 +6,7 @@ namespace ray { namespace raylet { ClusterTaskManager::ClusterTaskManager( - const ClientID &self_node_id, + const NodeID &self_node_id, std::shared_ptr cluster_resource_scheduler, std::function fulfills_dependencies_func, NodeInfoGetter get_node_info) @@ -50,7 +50,7 @@ bool ClusterTaskManager::SchedulePendingTasks() { cluster_resource_scheduler_->AllocateRemoteTaskResources(node_id_string, request_resources); - ClientID node_id = ClientID::FromBinary(node_id_string); + NodeID node_id = NodeID::FromBinary(node_id_string); auto node_info_opt = get_node_info_(node_id); // gcs_client_->Nodes().Get(node_id); RAY_CHECK(node_info_opt) @@ -260,7 +260,7 @@ void ClusterTaskManager::Dispatch( send_reply_callback(); } -void ClusterTaskManager::Spillback(ClientID spillback_to, std::string address, int port, +void ClusterTaskManager::Spillback(NodeID spillback_to, std::string address, int port, rpc::RequestWorkerLeaseReply *reply, std::function send_reply_callback) { reply->mutable_retry_at_raylet_address()->set_ip_address(address); diff --git a/src/ray/raylet/scheduling/cluster_task_manager.h b/src/ray/raylet/scheduling/cluster_task_manager.h index a53764f6d..d72852121 100644 --- a/src/ray/raylet/scheduling/cluster_task_manager.h +++ b/src/ray/raylet/scheduling/cluster_task_manager.h @@ -2,7 +2,6 @@ #include "absl/container/flat_hash_map.h" #include "absl/container/flat_hash_set.h" - #include "ray/common/task/task.h" #include "ray/common/task/task_common.h" #include "ray/raylet/scheduling/cluster_resource_scheduler.h" @@ -20,7 +19,7 @@ namespace raylet { /// dispatch/spillback and the callback to trigger it. typedef std::tuple> Work; -typedef std::function(const ClientID &node_id)> +typedef std::function(const NodeID &node_id)> NodeInfoGetter; /// Manages the queuing and dispatching of tasks. The logic is as follows: @@ -50,7 +49,7 @@ class ClusterTaskManager { /// \param fulfills_dependencies_func: Returns true if all of a task's /// dependencies are fulfilled. /// \param gcs_client: A gcs client. - ClusterTaskManager(const ClientID &self_node_id, + ClusterTaskManager(const NodeID &self_node_id, std::shared_ptr cluster_resource_scheduler, std::function fulfills_dependencies_func, NodeInfoGetter get_node_info); @@ -100,7 +99,7 @@ class ClusterTaskManager { std::string DebugString(); private: - const ClientID &self_node_id_; + const NodeID &self_node_id_; std::shared_ptr cluster_resource_scheduler_; std::function fulfills_dependencies_func_; NodeInfoGetter get_node_info_; @@ -125,7 +124,7 @@ class ClusterTaskManager { const TaskSpecification &task_spec, rpc::RequestWorkerLeaseReply *reply, std::function send_reply_callback); - void Spillback(ClientID spillback_to, std::string address, int port, + void Spillback(NodeID spillback_to, std::string address, int port, rpc::RequestWorkerLeaseReply *reply, std::function send_reply_callback); }; diff --git a/src/ray/raylet/scheduling/cluster_task_manager_test.cc b/src/ray/raylet/scheduling/cluster_task_manager_test.cc index d09fe612b..68509a772 100644 --- a/src/ray/raylet/scheduling/cluster_task_manager_test.cc +++ b/src/ray/raylet/scheduling/cluster_task_manager_test.cc @@ -282,7 +282,7 @@ Task CreateTask(const std::unordered_map &required_resource class ClusterTaskManagerTest : public ::testing::Test { public: ClusterTaskManagerTest() - : id_(ClientID::FromRandom()), + : id_(NodeID::FromRandom()), single_node_resource_scheduler_(CreateSingleNodeScheduler(id_.Binary())), fulfills_dependencies_calls_(0), dependencies_fulfilled_(true), @@ -293,7 +293,7 @@ class ClusterTaskManagerTest : public ::testing::Test { fulfills_dependencies_calls_++; return dependencies_fulfilled_; }, - [this](const ClientID &node_id) { + [this](const NodeID &node_id) { node_info_calls_++; return node_info_; }) {} @@ -302,7 +302,7 @@ class ClusterTaskManagerTest : public ::testing::Test { void Shutdown() {} - ClientID id_; + NodeID id_; std::shared_ptr single_node_resource_scheduler_; MockWorkerPool pool_; std::unordered_map> leased_workers_; diff --git a/src/ray/raylet/scheduling_policy.cc b/src/ray/raylet/scheduling_policy.cc index cd6d33b9b..a8fcb2c1c 100644 --- a/src/ray/raylet/scheduling_policy.cc +++ b/src/ray/raylet/scheduling_policy.cc @@ -28,16 +28,16 @@ SchedulingPolicy::SchedulingPolicy(const SchedulingQueue &scheduling_queue) : scheduling_queue_(scheduling_queue), gen_(std::chrono::high_resolution_clock::now().time_since_epoch().count()) {} -std::unordered_map SchedulingPolicy::Schedule( - std::unordered_map &cluster_resources, - const ClientID &local_client_id) { +std::unordered_map SchedulingPolicy::Schedule( + std::unordered_map &cluster_resources, + const NodeID &local_client_id) { // The policy decision to be returned. - std::unordered_map decision; + std::unordered_map decision; #ifndef NDEBUG RAY_LOG(DEBUG) << "Cluster resource map: "; for (const auto &client_resource_pair : cluster_resources) { - // pair = ClientID, SchedulingResources - const ClientID &client_id = client_resource_pair.first; + // pair = NodeID, SchedulingResources + const NodeID &client_id = client_resource_pair.first; const SchedulingResources &resources = client_resource_pair.second; RAY_LOG(DEBUG) << "client_id: " << client_id << " " << resources.GetAvailableResources().ToString(); @@ -74,10 +74,10 @@ std::unordered_map SchedulingPolicy::Schedule( // Construct a set of viable node candidates and randomly pick between them. // Get all the client id keys and randomly pick. - std::vector client_keys; + std::vector client_keys; for (const auto &client_resource_pair : cluster_resources) { - // pair = ClientID, SchedulingResources - ClientID node_client_id = client_resource_pair.first; + // pair = NodeID, SchedulingResources + NodeID node_client_id = client_resource_pair.first; const auto &node_resources = client_resource_pair.second; ResourceSet available_node_resources = ResourceSet(node_resources.GetAvailableResources()); @@ -102,7 +102,7 @@ std::unordered_map SchedulingPolicy::Schedule( // TODO(atumanov): change uniform random to discrete, weighted by resource capacity. std::uniform_int_distribution distribution(0, client_keys.size() - 1); int client_key_index = distribution(gen_); - const ClientID &dst_client_id = client_keys[client_key_index]; + const NodeID &dst_client_id = client_keys[client_key_index]; decision[task_id] = dst_client_id; // Update dst_client_id's load to keep track of remote task load until // the next heartbeat. @@ -112,8 +112,8 @@ std::unordered_map SchedulingPolicy::Schedule( } else { // If the task doesn't fit, place randomly subject to hard constraints. for (const auto &client_resource_pair2 : cluster_resources) { - // pair = ClientID, SchedulingResources - ClientID node_client_id = client_resource_pair2.first; + // pair = NodeID, SchedulingResources + NodeID node_client_id = client_resource_pair2.first; const auto &node_resources = client_resource_pair2.second; if (resource_demand.IsSubset(node_resources.GetTotalResources())) { // This node is a feasible candidate. @@ -128,7 +128,7 @@ std::unordered_map SchedulingPolicy::Schedule( // capacity. std::uniform_int_distribution distribution(0, client_keys.size() - 1); int client_key_index = distribution(gen_); - const ClientID &dst_client_id = client_keys[client_key_index]; + const NodeID &dst_client_id = client_keys[client_key_index]; decision[task_id] = dst_client_id; // Update dst_client_id's load to keep track of remote task load until // the next heartbeat. @@ -152,12 +152,12 @@ std::unordered_map SchedulingPolicy::Schedule( } bool SchedulingPolicy::ScheduleBundle( - std::unordered_map &cluster_resources, - const ClientID &local_client_id, const ray::BundleSpecification &bundle_spec) { + std::unordered_map &cluster_resources, + const NodeID &local_client_id, const ray::BundleSpecification &bundle_spec) { #ifndef NDEBUG RAY_LOG(DEBUG) << "Cluster resource map: "; for (const auto &client_resource_pair : cluster_resources) { - const ClientID &client_id = client_resource_pair.first; + const NodeID &client_id = client_resource_pair.first; const SchedulingResources &resources = client_resource_pair.second; RAY_LOG(DEBUG) << "client_id: " << client_id << " " << resources.GetAvailableResources().ToString(); @@ -168,7 +168,7 @@ bool SchedulingPolicy::ScheduleBundle( return false; } const auto &resource_demand = bundle_spec.GetRequiredResources(); - ClientID node_client_id = client_resource_pair->first; + NodeID node_client_id = client_resource_pair->first; const auto &node_resources = client_resource_pair->second; ResourceSet available_node_resources = ResourceSet(node_resources.GetAvailableResources()); diff --git a/src/ray/raylet/scheduling_policy.h b/src/ray/raylet/scheduling_policy.h index ea10218de..55e528833 100644 --- a/src/ray/raylet/scheduling_policy.h +++ b/src/ray/raylet/scheduling_policy.h @@ -46,9 +46,9 @@ class SchedulingPolicy { /// \param local_client_id The ID of the node manager that owns this /// SchedulingPolicy object. /// \return Scheduling decision, mapping tasks to raylets for placement. - std::unordered_map Schedule( - std::unordered_map &cluster_resources, - const ClientID &local_client_id); + std::unordered_map Schedule( + std::unordered_map &cluster_resources, + const NodeID &local_client_id); /// \param cluster_resources: a set of cluster resources containing resource and load /// information for some subset of the cluster. @@ -57,9 +57,9 @@ class SchedulingPolicy { /// \param bundle_spec the description of a bundle which include the resource the bundle /// need. \return If this bundle can be scheduled in this node, return true; else return /// false. - bool ScheduleBundle( - std::unordered_map &cluster_resources, - const ClientID &local_client_id, const ray::BundleSpecification &bundle_spec); + bool ScheduleBundle(std::unordered_map &cluster_resources, + const NodeID &local_client_id, + const ray::BundleSpecification &bundle_spec); /// \brief Given a set of cluster resources, try to spillover infeasible tasks. /// diff --git a/src/ray/raylet_client/raylet_client.cc b/src/ray/raylet_client/raylet_client.cc index 6859f7557..35b5f5dee 100644 --- a/src/ray/raylet_client/raylet_client.cc +++ b/src/ray/raylet_client/raylet_client.cc @@ -82,7 +82,7 @@ raylet::RayletClient::RayletClient( std::shared_ptr grpc_client, const std::string &raylet_socket, const WorkerID &worker_id, rpc::WorkerType worker_type, const JobID &job_id, const Language &language, - const std::string &ip_address, Status *status, ClientID *raylet_id, int *port, + const std::string &ip_address, Status *status, NodeID *raylet_id, int *port, std::unordered_map *system_config, const std::string &job_config) : grpc_client_(std::move(grpc_client)), @@ -120,7 +120,7 @@ raylet::RayletClient::RayletClient( *status = Status::Invalid(string_from_flatbuf(*reply_message->failure_reason())); return; } - *raylet_id = ClientID::FromBinary(reply_message->raylet_id()->str()); + *raylet_id = NodeID::FromBinary(reply_message->raylet_id()->str()); *port = reply_message->port(); RAY_CHECK(system_config); @@ -306,8 +306,7 @@ Status raylet::RayletClient::NotifyActorResumedFromCheckpoint( } Status raylet::RayletClient::SetResource(const std::string &resource_name, - const double capacity, - const ClientID &client_Id) { + const double capacity, const NodeID &client_Id) { flatbuffers::FlatBufferBuilder fbb; auto message = protocol::CreateSetResourceRequest(fbb, fbb.CreateString(resource_name), capacity, to_flatbuf(fbb, client_Id)); diff --git a/src/ray/raylet_client/raylet_client.h b/src/ray/raylet_client/raylet_client.h index 2dad68da1..8176def5b 100644 --- a/src/ray/raylet_client/raylet_client.h +++ b/src/ray/raylet_client/raylet_client.h @@ -28,8 +28,8 @@ using ray::ActorCheckpointID; using ray::ActorID; -using ray::ClientID; using ray::JobID; +using ray::NodeID; using ray::ObjectID; using ray::TaskID; using ray::WorkerID; @@ -182,7 +182,7 @@ class RayletClient : public PinObjectsInterface, /// \param language Language of the worker. /// \param ip_address The IP address of the worker. /// \param status This will be populated with the result of connection attempt. - /// \param raylet_id This will be populated with the local raylet's ClientID. + /// \param raylet_id This will be populated with the local raylet's NodeID. /// \param system_config This will be populated with internal config parameters /// provided by the raylet. /// \param port The port that the worker should listen on for gRPC requests. If @@ -191,7 +191,7 @@ class RayletClient : public PinObjectsInterface, std::shared_ptr grpc_client, const std::string &raylet_socket, const WorkerID &worker_id, rpc::WorkerType worker_type, const JobID &job_id, const Language &language, - const std::string &ip_address, Status *status, ClientID *raylet_id, + const std::string &ip_address, Status *status, NodeID *raylet_id, int *port, std::unordered_map *system_config, const std::string &job_config); @@ -328,10 +328,10 @@ class RayletClient : public PinObjectsInterface, /// Sets a resource with the specified capacity and client id /// \param resource_name Name of the resource to be set /// \param capacity Capacity of the resource - /// \param client_Id ClientID where the resource is to be set + /// \param client_Id NodeID where the resource is to be set /// \return ray::Status ray::Status SetResource(const std::string &resource_name, const double capacity, - const ray::ClientID &client_Id); + const ray::NodeID &client_Id); /// Ask the raylet to spill an object to external storage. /// \param object_id The ID of the object to be spilled. diff --git a/src/ray/rpc/worker/core_worker_client.h b/src/ray/rpc/worker/core_worker_client.h index cfb68df93..6a2069845 100644 --- a/src/ray/rpc/worker/core_worker_client.h +++ b/src/ray/rpc/worker/core_worker_client.h @@ -63,7 +63,7 @@ class WorkerAddress { : ip_address(address.ip_address()), port(address.port()), worker_id(WorkerID::FromBinary(address.worker_id())), - raylet_id(ClientID::FromBinary(address.raylet_id())) {} + raylet_id(NodeID::FromBinary(address.raylet_id())) {} template friend H AbslHashValue(H h, const WorkerAddress &w) { return H::combine(std::move(h), w.ip_address, w.port, w.worker_id, w.raylet_id); @@ -90,7 +90,7 @@ class WorkerAddress { /// The unique id of the worker. const WorkerID worker_id; /// The unique id of the worker raylet. - const ClientID raylet_id; + const NodeID raylet_id; }; typedef std::function(const rpc::Address &)>