[Refactor] Rename ClientId to NodeId (#10992)

* rename ClientId to NodeId

* format lint

* format lint

* fix conflicts

* rename new ClientId to NodeId

* update lint

* make same version of clang-format with travis ci
This commit is contained in:
DK.Pino
2020-09-28 01:24:21 +08:00
committed by GitHub
parent f69b390755
commit db7097fb1f
111 changed files with 862 additions and 889 deletions
+2 -2
View File
@@ -66,7 +66,7 @@ from ray._raylet import (
ActorCheckpointID,
ActorClassID,
ActorID,
ClientID,
NodeID,
Config as _Config,
JobID,
WorkerID,
@@ -151,7 +151,7 @@ __all__ += [
"ActorCheckpointID",
"ActorClassID",
"ActorID",
"ClientID",
"NodeID",
"JobID",
"WorkerID",
"FunctionID",
+4 -4
View File
@@ -76,7 +76,7 @@ from ray.includes.unique_ids cimport (
CActorID,
CActorCheckpointID,
CObjectID,
CClientID,
CNodeID,
CPlacementGroupID,
)
from ray.includes.libcoreworker cimport (
@@ -784,7 +784,7 @@ cdef class CoreWorker:
CCoreWorkerProcess.GetCoreWorker().GetCurrentJobId().Binary())
def get_current_node_id(self):
return ClientID(
return NodeID(
CCoreWorkerProcess.GetCoreWorker().GetCurrentNodeId().Binary())
def get_actor_id(self):
@@ -1479,10 +1479,10 @@ cdef class CoreWorker:
actor_id.native(), checkpoint_id.native()))
def set_resource(self, basestring resource_name,
double capacity, ClientID client_id):
double capacity, NodeID client_id):
CCoreWorkerProcess.GetCoreWorker().SetResource(
resource_name.encode("ascii"), capacity,
CClientID.FromBinary(client_id.binary()))
CNodeID.FromBinary(client_id.binary()))
def force_spill_objects(self, object_refs):
cdef c_vector[CObjectID] object_ids
+5 -5
View File
@@ -4,17 +4,17 @@ import ray
def set_resource(resource_name, capacity, client_id=None):
""" Set a resource to a specified capacity.
This creates, updates or deletes a custom resource for a target clientId.
This creates, updates or deletes a custom resource for a target NodeID.
If the resource already exists, it's capacity is updated to the new value.
If the capacity is set to 0, the resource is deleted.
If ClientID is not specified or set to None,
If NodeID is not specified or set to None,
the resource is created on the local client where the actor is running.
Args:
resource_name (str): Name of the resource to be created
capacity (int): Capacity of the new resource. Resource is deleted if
capacity is 0.
client_id (str): The ClientId of the node where the resource is to be
client_id (str): The NodeID of the node where the resource is to be
set.
Returns:
@@ -25,9 +25,9 @@ def set_resource(resource_name, capacity, client_id=None):
specified.
"""
if client_id is not None:
client_id_obj = ray.ClientID(ray.utils.hex_to_binary(client_id))
client_id_obj = ray.NodeID(ray.utils.hex_to_binary(client_id))
else:
client_id_obj = ray.ClientID.nil()
client_id_obj = ray.NodeID.nil()
if (capacity < 0) or (capacity != int(capacity)):
raise ValueError(
"Capacity {} must be a non-negative integer.".format(capacity))
@@ -4,7 +4,7 @@ from libcpp.vector cimport vector as c_vector
from libcpp.memory cimport unique_ptr
from ray.includes.unique_ids cimport (
CActorID,
CClientID,
CNodeID,
CObjectID,
CWorkerID,
CPlacementGroupID,
@@ -24,7 +24,7 @@ cdef extern from "ray/gcs/gcs_client/global_state_accessor.h" nogil:
unique_ptr[c_string] GetObjectInfo(const CObjectID &object_id)
c_vector[c_string] GetAllActorInfo()
unique_ptr[c_string] GetActorInfo(const CActorID &actor_id)
c_string GetNodeResourceInfo(const CClientID &node_id)
c_string GetNodeResourceInfo(const CNodeID &node_id)
unique_ptr[c_string] GetWorkerInfo(const CWorkerID &worker_id)
c_vector[c_string] GetAllWorkerInfo()
c_bool AddWorkerInfo(const c_string &serialized_string)
@@ -1,6 +1,6 @@
from ray.includes.unique_ids cimport (
CActorID,
CClientID,
CNodeID,
CObjectID,
CWorkerID,
CPlacementGroupID
@@ -89,7 +89,7 @@ cdef class GlobalStateAccessor:
def get_node_resource_info(self, node_id):
cdef c_string result
cdef CClientID cnode_id = CClientID.FromBinary(node_id.binary())
cdef CNodeID cnode_id = CNodeID.FromBinary(node_id.binary())
with nogil:
result = self.inner.get().GetNodeResourceInfo(cnode_id)
return result
+3 -3
View File
@@ -14,7 +14,7 @@ from libcpp.vector cimport vector as c_vector
from ray.includes.unique_ids cimport (
CActorID,
CActorCheckpointID,
CClientID,
CNodeID,
CJobID,
CTaskID,
CObjectID,
@@ -121,7 +121,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
CJobID GetCurrentJobId()
CTaskID GetCurrentTaskId()
CClientID GetCurrentNodeId()
CNodeID GetCurrentNodeId()
CPlacementGroupID GetCurrentPlacementGroupId()
const CActorID &GetActorId()
void SetActorTitle(const c_string &title)
@@ -196,7 +196,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
const CActorID &actor_id, const CActorCheckpointID &checkpoint_id)
CRayStatus SetResource(const c_string &resource_name,
const double capacity,
const CClientID &client_Id)
const CNodeID &client_Id)
CRayStatus SpillObjects(const c_vector[CObjectID] &object_ids)
CRayStatus ForceRestoreSpilledObjects(
const c_vector[CObjectID] &object_ids)
+2 -2
View File
@@ -65,10 +65,10 @@ cdef extern from "ray/common/id.h" namespace "ray" nogil:
CActorID Of(CJobID job_id, CTaskID parent_task_id,
int64_t parent_task_counter)
cdef cppclass CClientID "ray::ClientID"(CUniqueID):
cdef cppclass CNodeID "ray::NodeID"(CUniqueID):
@staticmethod
CClientID FromBinary(const c_string &binary)
CNodeID FromBinary(const c_string &binary)
cdef cppclass CConfigID "ray::ConfigID"(CUniqueID):
+6 -6
View File
@@ -12,7 +12,7 @@ from ray.includes.unique_ids cimport (
CActorCheckpointID,
CActorClassID,
CActorID,
CClientID,
CNodeID,
CConfigID,
CJobID,
CFunctionID,
@@ -199,14 +199,14 @@ cdef class TaskID(BaseID):
CTaskID.FromBinary(parent_task_id.binary()),
parent_task_counter).Binary())
cdef class ClientID(UniqueID):
cdef class NodeID(UniqueID):
def __init__(self, id):
check_id(id)
self.data = CClientID.FromBinary(<c_string>id)
self.data = CNodeID.FromBinary(<c_string>id)
cdef CClientID native(self):
return <CClientID>self.data
cdef CNodeID native(self):
return <CNodeID>self.data
cdef class JobID(BaseID):
@@ -373,7 +373,7 @@ _ID_TYPES = [
ActorCheckpointID,
ActorClassID,
ActorID,
ClientID,
NodeID,
JobID,
WorkerID,
FunctionID,
+1 -1
View File
@@ -266,7 +266,7 @@ class GlobalState:
"""
self._check_connected()
node_id = ray.ClientID(hex_to_binary(node_id))
node_id = ray.NodeID(hex_to_binary(node_id))
node_resource_bytes = \
self.global_state_accessor.get_node_resource_info(node_id)
if node_resource_bytes is None:
+1 -1
View File
@@ -23,4 +23,4 @@ DEFINE_UNIQUE_ID(ActorClassID)
DEFINE_UNIQUE_ID(ActorCheckpointID)
DEFINE_UNIQUE_ID(WorkerID)
DEFINE_UNIQUE_ID(ConfigID)
DEFINE_UNIQUE_ID(ClientID)
DEFINE_UNIQUE_ID(NodeID)
+1 -1
View File
@@ -13,7 +13,7 @@ typedef std::function<void(const std::shared_ptr<void>, const std::string &, int
DispatchTaskCallback;
/// Arguments are the raylet ID to spill back to, the raylet's
/// address and the raylet's port.
typedef std::function<void(const ClientID &, const std::string &, int)>
typedef std::function<void(const NodeID &, const std::string &, int)>
SpillbackTaskCallback;
typedef std::function<void()> CancelTaskCallback;
+1 -2
View File
@@ -129,8 +129,7 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
<< ", ip address: " << actor_data.address().ip_address()
<< ", port: " << actor_data.address().port() << ", worker_id: "
<< WorkerID::FromBinary(actor_data.address().worker_id())
<< ", raylet_id: "
<< ClientID::FromBinary(actor_data.address().raylet_id())
<< ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id())
<< ", num_restarts: " << actor_data.num_restarts();
if (actor_data.state() == gcs::ActorTableData::RESTARTING) {
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false);
+12 -12
View File
@@ -302,7 +302,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
auto grpc_client = rpc::NodeManagerWorkerClient::make(
options_.raylet_ip_address, options_.node_manager_port, *client_call_manager_);
Status raylet_client_status;
ClientID local_raylet_id;
NodeID local_raylet_id;
int assigned_port;
std::unordered_map<std::string, std::string> system_config;
local_raylet_client_ = std::shared_ptr<raylet::RayletClient>(new raylet::RayletClient(
@@ -352,7 +352,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
RegisterToGcs();
// Register a callback to monitor removed nodes.
auto on_node_change = [this](const ClientID &node_id, const rpc::GcsNodeInfo &data) {
auto on_node_change = [this](const NodeID &node_id, const rpc::GcsNodeInfo &data) {
if (data.state() == rpc::GcsNodeInfo::DEAD) {
OnNodeRemoved(data);
}
@@ -395,7 +395,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
options_.ref_counting_enabled ? reference_counter_ : nullptr, local_raylet_client_,
options_.check_signals));
auto check_node_alive_fn = [this](const ClientID &node_id) {
auto check_node_alive_fn = [this](const NodeID &node_id) {
auto node = gcs_client_->Nodes().Get(node_id);
if (!node) {
return false;
@@ -500,7 +500,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
RAY_CHECK_OK(status);
std::vector<rpc::Address> locations;
for (const auto &result : results) {
const auto &node_id = ClientID::FromBinary(result.manager());
const auto &node_id = NodeID::FromBinary(result.manager());
auto node = gcs_client_->Nodes().Get(node_id);
RAY_CHECK(node.has_value());
if (node->state() == rpc::GcsNodeInfo::ALIVE) {
@@ -617,7 +617,7 @@ void CoreWorker::RunIOService() {
}
void CoreWorker::OnNodeRemoved(const rpc::GcsNodeInfo &node_info) {
const auto node_id = ClientID::FromBinary(node_info.node_id());
const auto node_id = NodeID::FromBinary(node_info.node_id());
RAY_LOG(INFO) << "Node failure " << node_id;
const auto lost_objects = reference_counter_->ResetObjectsOnRemovedNode(node_id);
// Delete the objects from the in-memory store to indicate that they are not
@@ -825,7 +825,7 @@ Status CoreWorker::Put(const RayObject &object,
worker_context_.GetNextPutIndex());
reference_counter_->AddOwnedObject(
*object_id, contained_object_ids, rpc_address_, CurrentCallSite(), object.GetSize(),
/*is_reconstructable=*/false, ClientID::FromBinary(rpc_address_.raylet_id()));
/*is_reconstructable=*/false, NodeID::FromBinary(rpc_address_.raylet_id()));
return Put(object, contained_object_ids, *object_id, /*pin_object=*/true);
}
@@ -884,7 +884,7 @@ Status CoreWorker::Create(const std::shared_ptr<Buffer> &metadata, const size_t
reference_counter_->AddOwnedObject(*object_id, contained_object_ids, rpc_address_,
CurrentCallSite(), data_size + metadata->Size(),
/*is_reconstructable=*/false,
ClientID::FromBinary(rpc_address_.raylet_id()));
NodeID::FromBinary(rpc_address_.raylet_id()));
}
return Status::OK();
}
@@ -1153,7 +1153,7 @@ Status CoreWorker::NotifyActorResumedFromCheckpoint(
}
Status CoreWorker::SetResource(const std::string &resource_name, const double capacity,
const ClientID &client_id) {
const NodeID &client_id) {
return local_raylet_client_->SetResource(resource_name, capacity, client_id);
}
@@ -1167,7 +1167,7 @@ void CoreWorker::SpillOwnedObject(const ObjectID &object_id,
}
// Find the raylet that hosts the primary copy of the object.
ClientID pinned_at;
NodeID pinned_at;
RAY_CHECK(reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at));
auto node = gcs_client_->Nodes().Get(pinned_at);
if (pinned_at.IsNil() || !node) {
@@ -2023,7 +2023,7 @@ void CoreWorker::HandleAddObjectLocationOwner(
return;
}
reference_counter_->AddObjectLocation(ObjectID::FromBinary(request.object_id()),
ClientID::FromBinary(request.client_id()));
NodeID::FromBinary(request.client_id()));
send_reply_callback(Status::OK(), nullptr, nullptr);
}
@@ -2036,7 +2036,7 @@ void CoreWorker::HandleRemoveObjectLocationOwner(
return;
}
reference_counter_->RemoveObjectLocation(ObjectID::FromBinary(request.object_id()),
ClientID::FromBinary(request.client_id()));
NodeID::FromBinary(request.client_id()));
send_reply_callback(Status::OK(), nullptr, nullptr);
}
@@ -2048,7 +2048,7 @@ void CoreWorker::HandleGetObjectLocationsOwner(
send_reply_callback)) {
return;
}
std::unordered_set<ClientID> client_ids =
std::unordered_set<NodeID> client_ids =
reference_counter_->GetObjectLocations(ObjectID::FromBinary(request.object_id()));
for (const auto &client_id : client_ids) {
reply->add_client_ids(client_id.Binary());
+3 -5
View File
@@ -350,9 +350,7 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
const JobID &GetCurrentJobId() const { return worker_context_.GetCurrentJobID(); }
ClientID GetCurrentNodeId() const {
return ClientID::FromBinary(rpc_address_.raylet_id());
}
NodeID GetCurrentNodeId() const { return NodeID::FromBinary(rpc_address_.raylet_id()); }
const PlacementGroupID &GetCurrentPlacementGroupId() const {
return worker_context_.GetCurrentPlacementGroupId();
@@ -617,10 +615,10 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
/// Sets a resource with the specified capacity and client id
/// \param[in] resource_name Name of the resource to be set.
/// \param[in] capacity Capacity of the resource.
/// \param[in] client_Id ClientID where the resource is to be set.
/// \param[in] client_Id NodeID where the resource is to be set.
/// \return Status
Status SetResource(const std::string &resource_name, const double capacity,
const ClientID &client_id);
const NodeID &client_id);
/// Request an object to be spilled to external storage.
/// \param[in] object_ids The objects to be spilled.
@@ -252,7 +252,7 @@ JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeShutdown(JNIEn
JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeSetResource(
JNIEnv *env, jclass, jstring resourceName, jdouble capacity, jbyteArray nodeId) {
const auto node_id = JavaByteArrayToId<ClientID>(env, nodeId);
const auto node_id = JavaByteArrayToId<NodeID>(env, nodeId);
const char *native_resource_name = env->GetStringUTFChars(resourceName, JNI_FALSE);
auto status = ray::CoreWorkerProcess::GetCoreWorker().SetResource(
@@ -76,7 +76,7 @@ Java_io_ray_runtime_gcs_GlobalStateAccessor_nativeGetNodeResourceInfo(
JNIEnv *env, jobject o, jlong gcs_accessor_ptr, jbyteArray node_id_bytes) {
auto *gcs_accessor =
reinterpret_cast<ray::gcs::GlobalStateAccessor *>(gcs_accessor_ptr);
auto node_id = JavaByteArrayToId<ray::ClientID>(env, node_id_bytes);
auto node_id = JavaByteArrayToId<ray::NodeID>(env, node_id_bytes);
auto node_resource_info = gcs_accessor->GetNodeResourceInfo(node_id);
return static_cast<jbyteArray>(NativeStringToJavaByteArray(env, node_resource_info));
}
@@ -26,7 +26,7 @@
extern "C" {
#endif
using ray::ClientID;
using ray::NodeID;
JNIEXPORT jbyteArray JNICALL
Java_io_ray_runtime_task_NativeTaskExecutor_nativePrepareCheckpoint(JNIEnv *env, jclass) {
@@ -20,7 +20,7 @@ namespace ray {
Status ObjectRecoveryManager::RecoverObject(const ObjectID &object_id) {
// Check the ReferenceCounter to see if there is a location for the object.
ClientID pinned_at;
NodeID pinned_at;
bool owned_by_us = reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at);
if (!owned_by_us) {
return Status::Invalid(
@@ -80,12 +80,12 @@ void ObjectRecoveryManager::PinExistingObjectCopy(
const std::vector<rpc::Address> &other_locations) {
// If a copy still exists, pin the object by sending a
// PinObjectIDs RPC.
const auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
const auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
RAY_LOG(DEBUG) << "Trying to pin copy of lost object " << object_id << " at node "
<< node_id;
std::shared_ptr<PinObjectsInterface> client;
if (node_id == ClientID::FromBinary(rpc_address_.raylet_id())) {
if (node_id == NodeID::FromBinary(rpc_address_.raylet_id())) {
client = local_object_pinning_client_;
} else {
absl::MutexLock lock(&mu_);
@@ -137,7 +137,7 @@ class ObjectRecoveryManager {
mutable absl::Mutex mu_;
/// Cache of gRPC clients to remote raylets for pinning objects.
absl::flat_hash_map<ClientID, std::shared_ptr<PinObjectsInterface>>
absl::flat_hash_map<NodeID, std::shared_ptr<PinObjectsInterface>>
remote_object_pinning_clients_ GUARDED_BY(mu_);
/// Objects that are currently pending recovery. Calls to RecoverObject for
+16 -15
View File
@@ -144,11 +144,12 @@ void ReferenceCounter::AddObjectRefStats(
}
}
void ReferenceCounter::AddOwnedObject(
const ObjectID &object_id, const std::vector<ObjectID> &inner_ids,
const rpc::Address &owner_address, const std::string &call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<ClientID> &pinned_at_raylet_id) {
void ReferenceCounter::AddOwnedObject(const ObjectID &object_id,
const std::vector<ObjectID> &inner_ids,
const rpc::Address &owner_address,
const std::string &call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<NodeID> &pinned_at_raylet_id) {
RAY_LOG(DEBUG) << "Adding owned object " << object_id;
absl::MutexLock lock(&mutex_);
RAY_CHECK(object_id_refs_.count(object_id) == 0)
@@ -504,12 +505,12 @@ bool ReferenceCounter::SetDeleteCallback(
}
std::vector<ObjectID> ReferenceCounter::ResetObjectsOnRemovedNode(
const ClientID &raylet_id) {
const NodeID &raylet_id) {
absl::MutexLock lock(&mutex_);
std::vector<ObjectID> lost_objects;
for (auto it = object_id_refs_.begin(); it != object_id_refs_.end(); it++) {
const auto &object_id = it->first;
if (it->second.pinned_at_raylet_id.value_or(ClientID::Nil()) == raylet_id) {
if (it->second.pinned_at_raylet_id.value_or(NodeID::Nil()) == raylet_id) {
lost_objects.push_back(object_id);
ReleasePlasmaObject(it);
}
@@ -518,7 +519,7 @@ std::vector<ObjectID> ReferenceCounter::ResetObjectsOnRemovedNode(
}
void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id,
const ClientID &raylet_id) {
const NodeID &raylet_id) {
absl::MutexLock lock(&mutex_);
auto it = object_id_refs_.find(object_id);
if (it != object_id_refs_.end()) {
@@ -539,12 +540,12 @@ void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id,
}
bool ReferenceCounter::IsPlasmaObjectPinned(const ObjectID &object_id,
ClientID *pinned_at) const {
NodeID *pinned_at) const {
absl::MutexLock lock(&mutex_);
auto it = object_id_refs_.find(object_id);
if (it != object_id_refs_.end()) {
if (it->second.owned_by_us) {
*pinned_at = it->second.pinned_at_raylet_id.value_or(ClientID::Nil());
*pinned_at = it->second.pinned_at_raylet_id.value_or(NodeID::Nil());
return true;
}
}
@@ -890,29 +891,29 @@ void ReferenceCounter::SetReleaseLineageCallback(
}
void ReferenceCounter::AddObjectLocation(const ObjectID &object_id,
const ClientID &node_id) {
const NodeID &node_id) {
absl::MutexLock lock(&mutex_);
auto it = object_id_locations_.find(object_id);
if (it == object_id_locations_.end()) {
it = object_id_locations_.emplace(object_id, absl::flat_hash_set<ClientID>()).first;
it = object_id_locations_.emplace(object_id, absl::flat_hash_set<NodeID>()).first;
}
it->second.insert(node_id);
}
void ReferenceCounter::RemoveObjectLocation(const ObjectID &object_id,
const ClientID &node_id) {
const NodeID &node_id) {
absl::MutexLock lock(&mutex_);
auto it = object_id_locations_.find(object_id);
RAY_CHECK(it != object_id_locations_.end());
it->second.erase(node_id);
}
std::unordered_set<ClientID> ReferenceCounter::GetObjectLocations(
std::unordered_set<NodeID> ReferenceCounter::GetObjectLocations(
const ObjectID &object_id) {
absl::MutexLock lock(&mutex_);
auto it = object_id_locations_.find(object_id);
RAY_CHECK(it != object_id_locations_.end());
std::unordered_set<ClientID> locations;
std::unordered_set<NodeID> locations;
for (const auto &location : it->second) {
locations.insert(location);
}
+15 -17
View File
@@ -36,13 +36,11 @@ class ReferenceCounterInterface {
const std::string &call_site) = 0;
virtual bool AddBorrowedObject(const ObjectID &object_id, const ObjectID &outer_id,
const rpc::Address &owner_address) = 0;
virtual void AddOwnedObject(const ObjectID &object_id,
const std::vector<ObjectID> &contained_ids,
const rpc::Address &owner_address,
const std::string &call_site, const int64_t object_size,
bool is_reconstructable,
const absl::optional<ClientID> &pinned_at_raylet_id =
absl::optional<ClientID>()) = 0;
virtual void AddOwnedObject(
const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
const rpc::Address &owner_address, const std::string &call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<NodeID> &pinned_at_raylet_id = absl::optional<NodeID>()) = 0;
virtual bool SetDeleteCallback(
const ObjectID &object_id,
const std::function<void(const ObjectID &)> callback) = 0;
@@ -169,7 +167,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
const rpc::Address &owner_address, const std::string &call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<ClientID> &pinned_at_raylet_id = absl::optional<ClientID>())
const absl::optional<NodeID> &pinned_at_raylet_id = absl::optional<NodeID>())
LOCKS_EXCLUDED(mutex_);
/// Update the size of the object.
@@ -321,7 +319,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
///
/// \param[in] object_id The object to update.
/// \param[in] raylet_id The raylet that is now pinning the object ID.
void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const ClientID &raylet_id)
void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const NodeID &raylet_id)
LOCKS_EXCLUDED(mutex_);
/// Check whether the object is pinned at a remote plasma store node.
@@ -332,7 +330,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
/// \return True if the object exists and is owned by us, false otherwise. We
/// return false here because a borrower should not know the pinned location
/// for an object.
bool IsPlasmaObjectPinned(const ObjectID &object_id, ClientID *pinned_at) const
bool IsPlasmaObjectPinned(const ObjectID &object_id, NodeID *pinned_at) const
LOCKS_EXCLUDED(mutex_);
/// Get and reset the objects that were pinned on the given node. This
@@ -342,7 +340,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
///
/// \param[in] node_id The node whose object store has been removed.
/// \return The set of objects that were pinned on the given node.
std::vector<ObjectID> ResetObjectsOnRemovedNode(const ClientID &raylet_id);
std::vector<ObjectID> ResetObjectsOnRemovedNode(const NodeID &raylet_id);
/// Whether we have a reference to a particular ObjectID.
///
@@ -361,21 +359,21 @@ class ReferenceCounter : public ReferenceCounterInterface {
///
/// \param[in] object_id The object to update.
/// \param[in] node_id The node to be added to the location table.
void AddObjectLocation(const ObjectID &object_id, const ClientID &node_id)
void AddObjectLocation(const ObjectID &object_id, const NodeID &node_id)
LOCKS_EXCLUDED(mutex_);
/// Remove location from the location table of the given object.
///
/// \param[in] object_id The object to update.
/// \param[in] node_id The node to be removed from the location table.
void RemoveObjectLocation(const ObjectID &object_id, const ClientID &node_id)
void RemoveObjectLocation(const ObjectID &object_id, const NodeID &node_id)
LOCKS_EXCLUDED(mutex_);
/// Get the locations from the location table of the given object.
///
/// \param[in] object_id The object to get locations for.
/// \return The nodes that have the object.
std::unordered_set<ClientID> GetObjectLocations(const ObjectID &object_id)
std::unordered_set<NodeID> GetObjectLocations(const ObjectID &object_id)
LOCKS_EXCLUDED(mutex_);
private:
@@ -387,7 +385,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
/// Constructor for a reference that we created.
Reference(const rpc::Address &owner_address, std::string call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<ClientID> &pinned_at_raylet_id)
const absl::optional<NodeID> &pinned_at_raylet_id)
: call_site(call_site),
object_size(object_size),
owned_by_us(true),
@@ -461,7 +459,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
// If this object is owned by us and stored in plasma, and reference
// counting is enabled, then some raylet must be pinning the object value.
// This is the address of that raylet.
absl::optional<ClientID> pinned_at_raylet_id;
absl::optional<NodeID> pinned_at_raylet_id;
// Whether this object can be reconstructed via lineage. If false, then the
// object's value will be pinned as long as it is referenced by any other
// object's lineage.
@@ -680,7 +678,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
/// Holds all reference counts and dependency information for tracked ObjectIDs.
ReferenceTable object_id_refs_ GUARDED_BY(mutex_);
using LocationTable = absl::flat_hash_map<ObjectID, absl::flat_hash_set<ClientID>>;
using LocationTable = absl::flat_hash_map<ObjectID, absl::flat_hash_set<NodeID>>;
/// Holds the client information for the owned objects. This table is seperate from
/// the reference table because we add object reference after putting object into the
+5 -5
View File
@@ -56,7 +56,7 @@ class MockWorkerClient : public rpc::CoreWorkerClientInterface {
rpc::Address CreateRandomAddress(const std::string &addr) {
rpc::Address address;
address.set_ip_address(addr);
address.set_raylet_id(ClientID::FromRandom().Binary());
address.set_raylet_id(NodeID::FromRandom().Binary());
address.set_worker_id(WorkerID::FromRandom().Binary());
return address;
}
@@ -1986,11 +1986,11 @@ TEST_F(ReferenceCountLineageEnabledTest, TestPlasmaLocation) {
ObjectID borrowed_id = ObjectID::FromRandom();
rc->AddLocalReference(borrowed_id, "");
ClientID pinned_at;
NodeID pinned_at;
ASSERT_FALSE(rc->IsPlasmaObjectPinned(borrowed_id, &pinned_at));
ObjectID id = ObjectID::FromRandom();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true);
rc->AddLocalReference(id, "");
ASSERT_TRUE(rc->SetDeleteCallback(id, callback));
@@ -2023,7 +2023,7 @@ TEST_F(ReferenceCountTest, TestFree) {
auto callback = [&](const ObjectID &object_id) { deleted->insert(object_id); };
ObjectID id = ObjectID::FromRandom();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
// Test free before receiving information about where the object is pinned.
rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true);
@@ -2034,7 +2034,7 @@ TEST_F(ReferenceCountTest, TestFree) {
ASSERT_FALSE(rc->SetDeleteCallback(id, callback));
ASSERT_EQ(deleted->count(id), 0);
rc->UpdateObjectPinnedAtRaylet(id, node_id);
ClientID pinned_at;
NodeID pinned_at;
ASSERT_TRUE(rc->IsPlasmaObjectPinned(id, &pinned_at));
ASSERT_TRUE(pinned_at.IsNil());
ASSERT_TRUE(rc->IsPlasmaObjectFreed(id));
+1 -1
View File
@@ -183,7 +183,7 @@ void TaskManager::CompletePendingTask(const TaskID &task_id,
reference_counter_->UpdateObjectSize(object_id, return_object.size());
if (return_object.in_plasma()) {
const auto pinned_at_raylet_id = ClientID::FromBinary(worker_addr.raylet_id());
const auto pinned_at_raylet_id = NodeID::FromBinary(worker_addr.raylet_id());
if (check_node_alive_(pinned_at_raylet_id)) {
reference_counter_->UpdateObjectPinnedAtRaylet(object_id, pinned_at_raylet_id);
// Mark it as in plasma with a dummy object.
+2 -2
View File
@@ -58,7 +58,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
TaskManager(std::shared_ptr<CoreWorkerMemoryStore> in_memory_store,
std::shared_ptr<ReferenceCounter> reference_counter,
RetryTaskCallback retry_task_callback,
const std::function<bool(const ClientID &node_id)> &check_node_alive,
const std::function<bool(const NodeID &node_id)> &check_node_alive,
ReconstructObjectCallback reconstruct_object_callback)
: in_memory_store_(in_memory_store),
reference_counter_(reference_counter),
@@ -238,7 +238,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
/// processing a worker's reply to check whether the node that the worker
/// was on is still alive. If the node is down, the plasma objects returned by the task
/// are marked as failed.
const std::function<bool(const ClientID &node_id)> check_node_alive_;
const std::function<bool(const NodeID &node_id)> check_node_alive_;
/// Called when processing a worker's reply if the node that the worker was
/// on died. This should be called to attempt to recover a plasma object
/// returned by the task (or store an error if the object is not
@@ -101,7 +101,7 @@ class MockReferenceCounter : public ReferenceCounterInterface {
void(const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
const rpc::Address &owner_address, const std::string &call_site,
const int64_t object_size, bool is_reconstructable,
const absl::optional<ClientID> &pinned_at_raylet_id));
const absl::optional<NodeID> &pinned_at_raylet_id));
MOCK_METHOD2(SetDeleteCallback,
bool(const ObjectID &object_id,
@@ -120,7 +120,7 @@ class MockRayletClient : public WorkerLeaseInterface {
// Trigger reply to RequestWorkerLease.
bool GrantWorkerLease(const std::string &address, int port,
const ClientID &retry_at_raylet_id, bool cancel = false) {
const NodeID &retry_at_raylet_id, bool cancel = false) {
rpc::RequestWorkerLeaseReply reply;
if (cancel) {
reply.set_canceled(true);
@@ -341,7 +341,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
@@ -354,7 +354,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) {
ASSERT_EQ(raylet_client->num_workers_returned, 0);
ASSERT_EQ(worker_client->callbacks.size(), 0);
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
ASSERT_EQ(task_finisher->num_tasks_complete, 0);
ASSERT_EQ(task_finisher->num_tasks_failed, 0);
@@ -382,7 +382,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -390,7 +390,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) {
TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor);
ASSERT_TRUE(submitter.SubmitTask(task).ok());
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
// Simulate a system failure, i.e., worker died unexpectedly.
ASSERT_TRUE(worker_client->ReplyPushTask(Status::IOError("oops")));
ASSERT_EQ(worker_client->callbacks.size(), 0);
@@ -416,7 +416,7 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -431,17 +431,17 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed; worker 2 is requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// Task 2 is pushed; worker 3 is requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 2);
ASSERT_EQ(raylet_client->num_workers_requested, 3);
// Task 3 is pushed; no more workers requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 3);
ASSERT_EQ(raylet_client->num_workers_requested, 3);
@@ -471,7 +471,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -486,7 +486,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
ASSERT_EQ(raylet_client->num_leases_canceled, 0);
@@ -508,7 +508,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
ASSERT_EQ(raylet_client->num_workers_returned, 1);
// The second lease request is returned immediately.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 0);
ASSERT_EQ(raylet_client->num_workers_returned, 2);
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
@@ -532,7 +532,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -547,7 +547,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
// Task 1 finishes, Task 2 is scheduled on the same worker.
ASSERT_TRUE(worker_client->ReplyPushTask());
// Task 2 finishes, Task 3 is scheduled on the same worker.
@@ -569,7 +569,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
ASSERT_EQ(raylet_client->num_leases_canceled, i);
ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease());
ASSERT_EQ(raylet_client->num_leases_canceled, i);
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true));
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true));
ASSERT_EQ(worker_client->callbacks.size(), 0);
// The canceled lease is not returned.
ASSERT_EQ(raylet_client->num_workers_returned, 1);
@@ -592,7 +592,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -605,7 +605,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
ASSERT_TRUE(submitter.SubmitTask(task2).ok());
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// Task 1 finishes, Task 2 is scheduled on the same worker.
ASSERT_TRUE(worker_client->ReplyPushTask());
@@ -624,11 +624,11 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
// Task 2's lease request is canceled, a new worker is requested for task 3.
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
ASSERT_EQ(raylet_client->num_workers_requested, 2);
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true));
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true));
ASSERT_EQ(raylet_client->num_workers_requested, 3);
// Task 3 finishes, all workers returned.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_TRUE(worker_client->ReplyPushTask());
ASSERT_EQ(raylet_client->num_workers_returned, 2);
ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease());
@@ -649,7 +649,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -662,7 +662,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
@@ -673,7 +673,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
ASSERT_EQ(raylet_client->num_workers_disconnected, 1);
// Task 2 runs successfully on the second worker.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_TRUE(worker_client->ReplyPushTask());
ASSERT_EQ(raylet_client->num_workers_returned, 1);
ASSERT_EQ(raylet_client->num_workers_disconnected, 1);
@@ -697,7 +697,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -708,7 +708,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
// Task 1 finishes with exit status; the worker is not returned.
@@ -745,7 +745,7 @@ TEST(DirectTaskTransportTest, TestSpillback) {
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool,
lease_client_factory, store, task_finisher,
ClientID::Nil(), kLongTimeout, actor_creator);
NodeID::Nil(), kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
ray::FunctionDescriptorBuilder::BuildPython("", "", "", "");
@@ -758,14 +758,14 @@ TEST(DirectTaskTransportTest, TestSpillback) {
ASSERT_EQ(remote_lease_clients.size(), 0);
// Spillback to a remote node.
auto remote_raylet_id = ClientID::FromRandom();
auto remote_raylet_id = NodeID::FromRandom();
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id));
ASSERT_EQ(remote_lease_clients.count(7777), 1);
// There should be no more callbacks on the local client.
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil()));
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil()));
// Trigger retry at the remote node.
ASSERT_TRUE(
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil()));
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil()));
// The worker is returned to the remote node, not the local one.
ASSERT_TRUE(worker_client->ReplyPushTask());
@@ -804,7 +804,7 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
return client;
};
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto local_raylet_id = ClientID::FromRandom();
auto local_raylet_id = NodeID::FromRandom();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool,
lease_client_factory, store, task_finisher,
@@ -821,10 +821,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
ASSERT_EQ(remote_lease_clients.size(), 0);
// Spillback to a remote node.
auto remote_raylet_id = ClientID::FromRandom();
auto remote_raylet_id = NodeID::FromRandom();
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id));
ASSERT_EQ(remote_lease_clients.count(7777), 1);
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil()));
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil()));
// Trigger a spillback back to the local node.
ASSERT_TRUE(
remote_lease_clients[7777]->GrantWorkerLease("local", 1234, local_raylet_id));
@@ -832,10 +832,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
ASSERT_EQ(remote_lease_clients.size(), 1);
// There should be no more callbacks on the remote node.
ASSERT_FALSE(
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil()));
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil()));
// The worker is returned to the local node.
ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, NodeID::Nil()));
ASSERT_TRUE(worker_client->ReplyPushTask());
ASSERT_EQ(raylet_client->num_workers_returned, 1);
ASSERT_EQ(remote_lease_clients[7777]->num_workers_returned, 0);
@@ -868,7 +868,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
ASSERT_TRUE(submitter.SubmitTask(same1).ok());
@@ -877,7 +877,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// same1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 1);
// Another worker is requested because same2 is pending.
ASSERT_EQ(raylet_client->num_workers_requested, 3);
@@ -893,7 +893,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
// different is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 2);
ASSERT_EQ(raylet_client->num_workers_requested, 3);
@@ -910,7 +910,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
ASSERT_EQ(raylet_client->num_leases_canceled, 1);
// Trigger reply to RequestWorkerLease to remove the canceled pending lease request
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil(), true));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil(), true));
ASSERT_EQ(raylet_client->num_workers_returned, 2);
// Check that there are no entries left in the scheduling_key_entries_ hashmap. These
@@ -994,7 +994,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
/*lease_timeout_ms=*/5, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -1009,7 +1009,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1 is pushed.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// Task 1 finishes with failure; the worker is returned due to the error even though
@@ -1020,7 +1020,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
// Task 2 runs successfully on the second worker; the worker is returned due to the
// timeout.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
std::this_thread::sleep_for(
std::chrono::milliseconds(10)); // Sleep for 10ms, causing the lease to time out.
ASSERT_TRUE(worker_client->ReplyPushTask());
@@ -1029,7 +1029,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
// Task 3 runs successfully on the third worker; the worker is returned even though it
// hasn't timed out.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil()));
ASSERT_TRUE(worker_client->ReplyPushTask());
ASSERT_EQ(worker_client->callbacks.size(), 0);
ASSERT_EQ(raylet_client->num_workers_returned, 2);
@@ -1053,7 +1053,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -1061,7 +1061,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor);
ASSERT_TRUE(submitter.SubmitTask(task).ok());
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
// Try force kill, exiting the worker
ASSERT_TRUE(submitter.CancelTask(task, true).ok());
@@ -1077,7 +1077,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
task.GetMutableMessage().set_task_id(
TaskID::ForNormalTask(JobID::Nil(), TaskID::Nil(), 1).Binary());
ASSERT_TRUE(submitter.SubmitTask(task).ok());
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
// Try non-force kill, worker returns normally
ASSERT_TRUE(submitter.CancelTask(task, false).ok());
@@ -1105,7 +1105,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -1124,7 +1124,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) {
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
// Trigger reply to RequestWorkerLease to remove the canceled pending lease request
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil(), true));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil(), true));
// Check that there are no entries left in the scheduling_key_entries_ hashmap. These
// would otherwise cause a memory leak.
@@ -1141,7 +1141,7 @@ TEST(DirectTaskTransportTest, TestKillResolvingTask) {
auto task_finisher = std::make_shared<MockTaskFinisher>();
auto actor_creator = std::make_shared<MockActorCreator>();
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
store, task_finisher, ClientID::Nil(),
store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator);
std::unordered_map<std::string, double> empty_resources;
ray::FunctionDescriptor empty_descriptor =
@@ -1181,7 +1181,7 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) {
// parameter to the CoreWorkerDirectTaskSubmitter.
uint32_t max_tasks_in_flight_per_worker = 10;
CoreWorkerDirectTaskSubmitter submitter(
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
// Prepare 20 tasks and save them in a vector.
@@ -1201,12 +1201,12 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// First 10 tasks are pushed; worker 2 is requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 10);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// Last 10 tasks are pushed; no more workers are requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 20);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
@@ -1255,7 +1255,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
// parameter to the CoreWorkerDirectTaskSubmitter.
uint32_t max_tasks_in_flight_per_worker = 10;
CoreWorkerDirectTaskSubmitter submitter(
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
// prepare 30 tasks and save them in a vector
@@ -1275,7 +1275,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
ASSERT_EQ(raylet_client->num_workers_requested, 1);
// Task 1-10 are pushed, and a new worker is requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 10);
ASSERT_EQ(raylet_client->num_workers_requested, 2);
// The lease is not cancelled, as there is more work to do
@@ -1305,7 +1305,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
ASSERT_EQ(raylet_client->num_workers_returned, 1);
// The second lease request is returned immediately.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(worker_client->callbacks.size(), 0);
ASSERT_EQ(raylet_client->num_workers_returned, 2);
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
@@ -1334,7 +1334,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
// parameter to the CoreWorkerDirectTaskSubmitter.
uint32_t max_tasks_in_flight_per_worker = 10;
CoreWorkerDirectTaskSubmitter submitter(
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
// prepare 30 tasks and save them in a vector
@@ -1361,7 +1361,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
ASSERT_EQ(worker_client->callbacks.size(), 0);
// Grant a worker lease, and check that still only 1 worker was requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
ASSERT_EQ(raylet_client->num_workers_requested, 1);
ASSERT_EQ(raylet_client->num_workers_returned, 0);
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
@@ -1399,7 +1399,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
ASSERT_EQ(worker_client->callbacks.size(), 10);
// Grant a worker lease, and check that still only 2 workers were requested.
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
ASSERT_EQ(raylet_client->num_workers_requested, 2);
ASSERT_EQ(raylet_client->num_workers_returned, 0);
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
@@ -104,7 +104,7 @@ class MockObjectDirectory {
class ObjectRecoveryManagerTest : public ::testing::Test {
public:
ObjectRecoveryManagerTest()
: local_raylet_id_(ClientID::FromRandom()),
: local_raylet_id_(NodeID::FromRandom()),
object_directory_(std::make_shared<MockObjectDirectory>()),
memory_store_(std::make_shared<CoreWorkerMemoryStore>()),
raylet_client_(std::make_shared<MockRayletClient>()),
@@ -137,7 +137,7 @@ class ObjectRecoveryManagerTest : public ::testing::Test {
},
/*lineage_reconstruction_enabled=*/true) {}
ClientID local_raylet_id_;
NodeID local_raylet_id_;
std::unordered_map<ObjectID, bool> failed_reconstructions_;
std::shared_ptr<MockObjectDirectory> object_directory_;
@@ -46,7 +46,7 @@ class TaskManagerTest : public ::testing::Test {
num_retries_++;
return Status::OK();
},
[this](const ClientID &node_id) { return all_nodes_alive_; },
[this](const NodeID &node_id) { return all_nodes_alive_; },
[this](const ObjectID &object_id) {
objects_to_recover_.push_back(object_id);
}) {}
@@ -233,9 +233,9 @@ CoreWorkerDirectTaskSubmitter::GetOrConnectLeaseClient(
const rpc::Address *raylet_address) {
std::shared_ptr<WorkerLeaseInterface> lease_client;
if (raylet_address &&
ClientID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) {
NodeID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) {
// A remote raylet was specified. Connect to the raylet if needed.
ClientID raylet_id = ClientID::FromBinary(raylet_address->raylet_id());
NodeID raylet_id = NodeID::FromBinary(raylet_address->raylet_id());
auto it = remote_lease_clients_.find(raylet_id);
if (it == remote_lease_clients_.end()) {
RAY_LOG(DEBUG) << "Connecting to raylet " << raylet_id;
@@ -55,7 +55,7 @@ class CoreWorkerDirectTaskSubmitter {
std::shared_ptr<rpc::CoreWorkerClientPool> core_worker_client_pool,
LeaseClientFactoryFn lease_client_factory,
std::shared_ptr<CoreWorkerMemoryStore> store,
std::shared_ptr<TaskFinisherInterface> task_finisher, ClientID local_raylet_id,
std::shared_ptr<TaskFinisherInterface> task_finisher, NodeID local_raylet_id,
int64_t lease_timeout_ms, std::shared_ptr<ActorCreatorInterface> actor_creator,
uint32_t max_tasks_in_flight_per_worker =
RayConfig::instance().max_tasks_in_flight_per_worker(),
@@ -154,8 +154,8 @@ class CoreWorkerDirectTaskSubmitter {
std::shared_ptr<WorkerLeaseInterface> local_lease_client_;
/// Cache of gRPC clients to remote raylets.
absl::flat_hash_map<ClientID, std::shared_ptr<WorkerLeaseInterface>>
remote_lease_clients_ GUARDED_BY(mu_);
absl::flat_hash_map<NodeID, std::shared_ptr<WorkerLeaseInterface>> remote_lease_clients_
GUARDED_BY(mu_);
/// Factory for producing new clients to request leases from remote nodes.
LeaseClientFactoryFn lease_client_factory_;
@@ -172,7 +172,7 @@ class CoreWorkerDirectTaskSubmitter {
/// The local raylet ID. Used to make sure that we use the local lease client
/// if a remote raylet tells us to spill the task back to the local raylet.
const ClientID local_raylet_id_;
const NodeID local_raylet_id_;
/// Interface for actor creation.
std::shared_ptr<ActorCreatorInterface> actor_creator_;
+13 -14
View File
@@ -370,7 +370,7 @@ class ObjectInfoAccessor {
/// \param node_id The location that will be added to GCS.
/// \param callback Callback that will be called after object has been added to GCS.
/// \return Status
virtual Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
virtual Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) = 0;
/// Remove location of object from GCS asynchronously.
@@ -379,7 +379,7 @@ class ObjectInfoAccessor {
/// \param node_id The location that will be removed from GCS.
/// \param callback Callback that will be called after the delete finished.
/// \return Status
virtual Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
virtual Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) = 0;
/// Subscribe to any update of an object's location.
@@ -434,8 +434,8 @@ class NodeInfoAccessor {
/// Get id of local node which was registered by 'RegisterSelf'.
///
/// \return ClientID
virtual const ClientID &GetSelfId() const = 0;
/// \return NodeID
virtual const NodeID &GetSelfId() const = 0;
/// Get information of local node which was registered by 'RegisterSelf'.
///
@@ -455,7 +455,7 @@ class NodeInfoAccessor {
/// \param node_id The ID of node that to be unregistered.
/// \param callback Callback that will be called when unregistration is complete.
/// \return Status
virtual Status AsyncUnregister(const ClientID &node_id,
virtual Status AsyncUnregister(const NodeID &node_id,
const StatusCallback &callback) = 0;
/// Get information of all nodes from GCS asynchronously.
@@ -472,7 +472,7 @@ class NodeInfoAccessor {
/// \param done Callback that will be called when subscription is complete.
/// \return Status
virtual Status AsyncSubscribeToNodeChange(
const SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe,
const SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
const StatusCallback &done) = 0;
/// Get node information from local cache.
@@ -483,7 +483,7 @@ class NodeInfoAccessor {
/// \param node_id The ID of node to look up in local cache.
/// \return The item returned by GCS. If the item to read doesn't exist,
/// this optional object is empty.
virtual boost::optional<rpc::GcsNodeInfo> Get(const ClientID &node_id) const = 0;
virtual boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id) const = 0;
/// Get information of all nodes from local cache.
/// Non-thread safe.
@@ -491,7 +491,7 @@ class NodeInfoAccessor {
/// is called before.
///
/// \return All nodes in cache.
virtual const std::unordered_map<ClientID, rpc::GcsNodeInfo> &GetAll() const = 0;
virtual const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const = 0;
/// Search the local cache to find out if the given node is removed.
/// Non-thread safe.
@@ -500,7 +500,7 @@ class NodeInfoAccessor {
///
/// \param node_id The id of the node to check.
/// \return Whether the node is removed.
virtual bool IsRemoved(const ClientID &node_id) const = 0;
virtual bool IsRemoved(const NodeID &node_id) const = 0;
// TODO(micafan) Define ResourceMap in GCS proto.
typedef std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>>
@@ -511,7 +511,7 @@ class NodeInfoAccessor {
/// \param node_id The ID of node to lookup dynamic resources.
/// \param callback Callback that will be called after lookup finishes.
/// \return Status
virtual Status AsyncGetResources(const ClientID &node_id,
virtual Status AsyncGetResources(const NodeID &node_id,
const OptionalItemCallback<ResourceMap> &callback) = 0;
/// Update resources of node in GCS asynchronously.
@@ -519,8 +519,7 @@ class NodeInfoAccessor {
/// \param node_id The ID of node to update dynamic resources.
/// \param resources The dynamic resources of node to be updated.
/// \param callback Callback that will be called after update finishes.
virtual Status AsyncUpdateResources(const ClientID &node_id,
const ResourceMap &resources,
virtual Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
const StatusCallback &callback) = 0;
/// Delete resources of a node from GCS asynchronously.
@@ -528,7 +527,7 @@ class NodeInfoAccessor {
/// \param node_id The ID of node to delete resources from GCS.
/// \param resource_names The names of resource to be deleted.
/// \param callback Callback that will be called after delete finishes.
virtual Status AsyncDeleteResources(const ClientID &node_id,
virtual Status AsyncDeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names,
const StatusCallback &callback) = 0;
@@ -560,7 +559,7 @@ class NodeInfoAccessor {
/// \param done Callback that will be called when subscription is complete.
/// \return Status
virtual Status AsyncSubscribeHeartbeat(
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
const StatusCallback &done) = 0;
/// Report state of all nodes to GCS asynchronously.
@@ -127,7 +127,7 @@ std::unique_ptr<std::string> GlobalStateAccessor::GetObjectInfo(
return object_info;
}
std::string GlobalStateAccessor::GetNodeResourceInfo(const ClientID &node_id) {
std::string GlobalStateAccessor::GetNodeResourceInfo(const NodeID &node_id) {
rpc::ResourceMap node_resource_map;
std::promise<void> promise;
auto on_done =
@@ -84,7 +84,7 @@ class GlobalStateAccessor {
/// \return node resource map info. To support multi-language, we serialize each
/// ResourceTableData and return the serialized string. Where used, it needs to be
/// deserialized with protobuf function.
std::string GetNodeResourceInfo(const ClientID &node_id);
std::string GetNodeResourceInfo(const NodeID &node_id);
/// Get internal config from GCS Service.
///
@@ -457,7 +457,7 @@ ServiceBasedNodeInfoAccessor::ServiceBasedNodeInfoAccessor(
: client_impl_(client_impl) {}
Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info) {
auto node_id = ClientID::FromBinary(local_node_info.node_id());
auto node_id = NodeID::FromBinary(local_node_info.node_id());
RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id
<< ", address is = " << local_node_info.node_manager_address();
RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected.";
@@ -472,7 +472,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_
const Status &status, const rpc::RegisterNodeReply &reply) {
if (status.ok()) {
local_node_info_.CopyFrom(local_node_info);
local_node_id_ = ClientID::FromBinary(local_node_info.node_id());
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
}
RAY_LOG(DEBUG) << "Finished registering node info, status = " << status
<< ", node id = " << node_id;
@@ -486,7 +486,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_
Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
RAY_CHECK(!local_node_id_.IsNil()) << "This node is disconnected.";
ClientID node_id = ClientID::FromBinary(local_node_info_.node_id());
NodeID node_id = NodeID::FromBinary(local_node_info_.node_id());
RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id;
rpc::UnregisterNodeRequest request;
request.set_node_id(local_node_info_.node_id());
@@ -495,7 +495,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
[this, node_id](const Status &status, const rpc::UnregisterNodeReply &reply) {
if (status.ok()) {
local_node_info_.set_state(GcsNodeInfo::DEAD);
local_node_id_ = ClientID::Nil();
local_node_id_ = NodeID::Nil();
}
RAY_LOG(INFO) << "Finished unregistering node info, status = " << status
<< ", node id = " << node_id;
@@ -503,7 +503,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
return Status::OK();
}
const ClientID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; }
const NodeID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; }
const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const {
return local_node_info_;
@@ -511,7 +511,7 @@ const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const {
Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_info,
const StatusCallback &callback) {
ClientID node_id = ClientID::FromBinary(node_info.node_id());
NodeID node_id = NodeID::FromBinary(node_info.node_id());
RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id;
rpc::RegisterNodeRequest request;
request.mutable_node_info()->CopyFrom(node_info);
@@ -527,7 +527,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_
return Status::OK();
}
Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
const StatusCallback &callback) {
RAY_LOG(DEBUG) << "Unregistering node info, node id = " << node_id;
rpc::UnregisterNodeRequest request;
@@ -563,8 +563,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetAll(
}
Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const StatusCallback &done) {
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
RAY_CHECK(node_change_callback_ == nullptr);
node_change_callback_ = subscribe;
@@ -597,7 +596,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange(
}
boost::optional<GcsNodeInfo> ServiceBasedNodeInfoAccessor::Get(
const ClientID &node_id) const {
const NodeID &node_id) const {
RAY_CHECK(!node_id.IsNil());
auto entry = node_cache_.find(node_id);
if (entry != node_cache_.end()) {
@@ -606,17 +605,17 @@ boost::optional<GcsNodeInfo> ServiceBasedNodeInfoAccessor::Get(
return boost::none;
}
const std::unordered_map<ClientID, GcsNodeInfo> &ServiceBasedNodeInfoAccessor::GetAll()
const std::unordered_map<NodeID, GcsNodeInfo> &ServiceBasedNodeInfoAccessor::GetAll()
const {
return node_cache_;
}
bool ServiceBasedNodeInfoAccessor::IsRemoved(const ClientID &node_id) const {
bool ServiceBasedNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
return removed_nodes_.count(node_id) == 1;
}
Status ServiceBasedNodeInfoAccessor::AsyncGetResources(
const ClientID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
RAY_LOG(DEBUG) << "Getting node resources, node id = " << node_id;
rpc::GetResourcesRequest request;
request.set_node_id(node_id.Binary());
@@ -636,8 +635,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetResources(
}
Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources(
const ClientID &node_id, const ResourceMap &resources,
const StatusCallback &callback) {
const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) {
RAY_LOG(DEBUG) << "Updating node resources, node id = " << node_id;
rpc::UpdateResourcesRequest request;
request.set_node_id(node_id.Binary());
@@ -664,7 +662,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources(
}
Status ServiceBasedNodeInfoAccessor::AsyncDeleteResources(
const ClientID &node_id, const std::vector<std::string> &resource_names,
const NodeID &node_id, const std::vector<std::string> &resource_names,
const StatusCallback &callback) {
RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id;
rpc::DeleteResourcesRequest request;
@@ -732,7 +730,7 @@ void ServiceBasedNodeInfoAccessor::AsyncReReportHeartbeat() {
}
Status ServiceBasedNodeInfoAccessor::AsyncSubscribeHeartbeat(
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
const StatusCallback &done) {
const std::string error_msg =
"Unsupported method of AsyncSubscribeHeartbeat in ServiceBasedNodeInfoAccessor.";
@@ -766,7 +764,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeBatchHeartbeat(
}
void ServiceBasedNodeInfoAccessor::HandleNotification(const GcsNodeInfo &node_info) {
ClientID node_id = ClientID::FromBinary(node_info.node_id());
NodeID node_id = NodeID::FromBinary(node_info.node_id());
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
auto entry = node_cache_.find(node_id);
bool is_notif_new;
@@ -974,7 +972,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) {
Status ServiceBasedTaskInfoAccessor::AsyncAddTaskLease(
const std::shared_ptr<rpc::TaskLeaseData> &data_ptr, const StatusCallback &callback) {
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id());
NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id());
RAY_LOG(DEBUG) << "Adding task lease, task id = " << task_id
<< ", node id = " << node_id;
rpc::AddTaskLeaseRequest request;
@@ -1061,7 +1059,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &tas
Status ServiceBasedTaskInfoAccessor::AttemptTaskReconstruction(
const std::shared_ptr<rpc::TaskReconstructionData> &data_ptr,
const StatusCallback &callback) {
ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id());
NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id());
RAY_LOG(DEBUG) << "Reconstructing task, reconstructions num = "
<< data_ptr->num_reconstructions() << ", node id = " << node_id;
rpc::AttemptTaskReconstructionRequest request;
@@ -1151,7 +1149,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncGetAll(
}
Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
const ClientID &node_id,
const NodeID &node_id,
const StatusCallback &callback) {
RAY_LOG(DEBUG) << "Adding object location, object id = " << object_id
<< ", node id = " << node_id;
@@ -1179,7 +1177,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_i
}
Status ServiceBasedObjectInfoAccessor::AsyncRemoveLocation(
const ObjectID &object_id, const ClientID &node_id, const StatusCallback &callback) {
const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) {
RAY_LOG(DEBUG) << "Removing object location, object id = " << object_id
<< ", node id = " << node_id;
rpc::RemoveObjectLocationRequest request;
@@ -1299,7 +1297,7 @@ ServiceBasedStatsInfoAccessor::ServiceBasedStatsInfoAccessor(
Status ServiceBasedStatsInfoAccessor::AsyncAddProfileData(
const std::shared_ptr<rpc::ProfileTableData> &data_ptr,
const StatusCallback &callback) {
ClientID node_id = ClientID::FromBinary(data_ptr->component_id());
NodeID node_id = NodeID::FromBinary(data_ptr->component_id());
RAY_LOG(DEBUG) << "Adding profile data, component type = " << data_ptr->component_type()
<< ", node id = " << node_id;
rpc::AddProfileDataRequest request;
+17 -18
View File
@@ -156,35 +156,34 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
Status UnregisterSelf() override;
const ClientID &GetSelfId() const override;
const NodeID &GetSelfId() const override;
const GcsNodeInfo &GetSelfInfo() const override;
Status AsyncRegister(const rpc::GcsNodeInfo &node_info,
const StatusCallback &callback) override;
Status AsyncUnregister(const ClientID &node_id,
const StatusCallback &callback) override;
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
Status AsyncSubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const StatusCallback &done) override;
boost::optional<GcsNodeInfo> Get(const ClientID &node_id) const override;
boost::optional<GcsNodeInfo> Get(const NodeID &node_id) const override;
const std::unordered_map<ClientID, GcsNodeInfo> &GetAll() const override;
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
bool IsRemoved(const ClientID &node_id) const override;
bool IsRemoved(const NodeID &node_id) const override;
Status AsyncGetResources(const ClientID &node_id,
Status AsyncGetResources(const NodeID &node_id,
const OptionalItemCallback<ResourceMap> &callback) override;
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
const StatusCallback &callback) override;
Status AsyncDeleteResources(const ClientID &node_id,
Status AsyncDeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names,
const StatusCallback &callback) override;
@@ -197,7 +196,7 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
void AsyncReReportHeartbeat() override;
Status AsyncSubscribeHeartbeat(
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncReportBatchHeartbeat(
@@ -240,20 +239,20 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
ServiceBasedGcsClient *client_impl_;
using NodeChangeCallback =
std::function<void(const ClientID &id, const GcsNodeInfo &node_info)>;
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
GcsNodeInfo local_node_info_;
ClientID local_node_id_;
NodeID local_node_id_;
Sequencer<ClientID> sequencer_;
Sequencer<NodeID> sequencer_;
/// The callback to call when a new node is added or a node is removed.
NodeChangeCallback node_change_callback_{nullptr};
/// A cache for information about all nodes.
std::unordered_map<ClientID, GcsNodeInfo> node_cache_;
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
/// The set of removed nodes.
std::unordered_set<ClientID> removed_nodes_;
std::unordered_set<NodeID> removed_nodes_;
};
/// \class ServiceBasedTaskInfoAccessor
@@ -329,10 +328,10 @@ class ServiceBasedObjectInfoAccessor : public ObjectInfoAccessor {
Status AsyncGetAll(const MultiItemCallback<rpc::ObjectLocationInfo> &callback) override;
Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncSubscribeToLocations(
@@ -139,7 +139,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) {
for (int index = 0; index < node_count; ++index) {
auto node_table_data =
Mocker::GenNodeInfo(index, std::string("127.0.0.") + std::to_string(index));
auto node_id = ClientID::FromBinary(node_table_data->node_id());
auto node_id = NodeID::FromBinary(node_table_data->node_id());
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Nodes().AsyncRegister(
*node_table_data, [&promise](Status status) { promise.set_value(status.ok()); }));
@@ -158,7 +158,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) {
rpc::GcsNodeInfo node_data;
node_data.ParseFromString(node_table[index]);
auto resource_map_str =
global_state_->GetNodeResourceInfo(ClientID::FromBinary(node_data.node_id()));
global_state_->GetNodeResourceInfo(NodeID::FromBinary(node_data.node_id()));
rpc::ResourceMap resource_map;
resource_map.ParseFromString(resource_map_str);
ASSERT_EQ(
@@ -195,7 +195,7 @@ TEST_F(GlobalStateAccessorTest, TestProfileTable) {
int profile_count = RayConfig::instance().maximum_profile_table_rows_count() + 1;
ASSERT_EQ(global_state_->GetAllProfileInfo().size(), 0);
for (int index = 0; index < profile_count; ++index) {
auto client_id = ClientID::FromRandom();
auto client_id = NodeID::FromRandom();
auto profile_table_data = Mocker::GenProfileTableData(client_id);
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Stats().AsyncAddProfileData(
@@ -215,7 +215,7 @@ TEST_F(GlobalStateAccessorTest, TestObjectTable) {
for (int index = 0; index < object_count; ++index) {
ObjectID object_id = ObjectID::FromRandom();
object_ids.emplace_back(object_id);
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation(
object_id, node_id,
@@ -221,7 +221,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
}
bool SubscribeToNodeChange(
const gcs::SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe) {
const gcs::SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe) {
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Nodes().AsyncSubscribeToNodeChange(
subscribe, [&promise](Status status) { promise.set_value(status.ok()); }));
@@ -258,7 +258,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
return nodes;
}
bool UnregisterNode(const ClientID &node_id) {
bool UnregisterNode(const NodeID &node_id) {
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Nodes().AsyncUnregister(
node_id, [&promise](Status status) { promise.set_value(status.ok()); }));
@@ -272,7 +272,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
return WaitReady(promise.get_future(), timeout_ms_);
}
gcs::NodeInfoAccessor::ResourceMap GetResources(const ClientID &node_id) {
gcs::NodeInfoAccessor::ResourceMap GetResources(const NodeID &node_id) {
gcs::NodeInfoAccessor::ResourceMap resource_map;
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Nodes().AsyncGetResources(
@@ -288,7 +288,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
return resource_map;
}
bool UpdateResources(const ClientID &node_id, const std::string &key) {
bool UpdateResources(const NodeID &node_id, const std::string &key) {
std::promise<bool> promise;
gcs::NodeInfoAccessor::ResourceMap resource_map;
auto resource = std::make_shared<rpc::ResourceTableData>();
@@ -300,7 +300,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
return WaitReady(promise.get_future(), timeout_ms_);
}
bool DeleteResources(const ClientID &node_id,
bool DeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names) {
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Nodes().AsyncDeleteResources(
@@ -415,7 +415,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
RAY_CHECK_OK(gcs_client_->Objects().AsyncUnsubscribeToLocations(object_id));
}
bool AddLocation(const ObjectID &object_id, const ClientID &node_id) {
bool AddLocation(const ObjectID &object_id, const NodeID &node_id) {
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation(
object_id, node_id,
@@ -423,7 +423,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
return WaitReady(promise.get_future(), timeout_ms_);
}
bool RemoveLocation(const ObjectID &object_id, const ClientID &node_id) {
bool RemoveLocation(const ObjectID &object_id, const NodeID &node_id) {
std::promise<bool> promise;
RAY_CHECK_OK(gcs_client_->Objects().AsyncRemoveLocation(
object_id, node_id,
@@ -601,12 +601,12 @@ TEST_F(ServiceBasedGcsClientTest, TestActorSubscribeAll) {
TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) {
// Create gcs node info.
auto gcs_node1_info = Mocker::GenNodeInfo();
ClientID node1_id = ClientID::FromBinary(gcs_node1_info->node_id());
NodeID node1_id = NodeID::FromBinary(gcs_node1_info->node_id());
// Subscribe to node addition and removal events from GCS.
std::atomic<int> register_count(0);
std::atomic<int> unregister_count(0);
auto on_subscribe = [&register_count, &unregister_count](const ClientID &node_id,
auto on_subscribe = [&register_count, &unregister_count](const NodeID &node_id,
const rpc::GcsNodeInfo &data) {
if (data.state() == rpc::GcsNodeInfo::ALIVE) {
++register_count;
@@ -625,7 +625,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) {
// Register a node to GCS.
auto gcs_node2_info = Mocker::GenNodeInfo();
ClientID node2_id = ClientID::FromBinary(gcs_node2_info->node_id());
NodeID node2_id = NodeID::FromBinary(gcs_node2_info->node_id());
ASSERT_TRUE(RegisterNode(*gcs_node2_info));
WaitForExpectedCount(register_count, 2);
@@ -671,7 +671,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeResources) {
RAY_CHECK(RegisterNode(*node_info));
// Update resources of node in GCS.
ClientID node_id = ClientID::FromBinary(node_info->node_id());
NodeID node_id = NodeID::FromBinary(node_info->node_id());
std::string key = "CPU";
ASSERT_TRUE(UpdateResources(node_id, key));
WaitForExpectedCount(add_count, 1);
@@ -697,7 +697,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeHeartbeat) {
RAY_CHECK(RegisterNode(*node_info));
// Report heartbeat of a node to GCS.
ClientID node_id = ClientID::FromBinary(node_info->node_id());
NodeID node_id = NodeID::FromBinary(node_info->node_id());
auto heartbeat = std::make_shared<rpc::HeartbeatTableData>();
heartbeat->set_client_id(node_id.Binary());
// Set this flag because GCS won't publish unchanged heartbeat.
@@ -747,7 +747,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) {
ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe));
// Add a task lease to GCS.
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
ASSERT_TRUE(AddTaskLease(task_lease));
WaitForExpectedCount(task_lease_count, 2);
@@ -771,7 +771,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) {
TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) {
ObjectID object_id = ObjectID::FromRandom();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
// Subscribe to any update of an object's location.
std::atomic<int> object_add_count(0);
@@ -816,7 +816,7 @@ TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) {
TEST_F(ServiceBasedGcsClientTest, TestStats) {
// Add profile data to GCS.
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
auto profile_table_data = Mocker::GenProfileTableData(node_id);
ASSERT_TRUE(AddProfileData(profile_table_data));
}
@@ -938,7 +938,7 @@ TEST_F(ServiceBasedGcsClientTest, TestActorTableResubscribe) {
TEST_F(ServiceBasedGcsClientTest, TestObjectTableResubscribe) {
ObjectID object1_id = ObjectID::FromRandom();
ObjectID object2_id = ObjectID::FromRandom();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
// Subscribe to any update of an object's location.
std::atomic<int> object1_change_count(0);
@@ -985,7 +985,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
// Test that subscription of the node table can still work when GCS server restarts.
// Subscribe to node addition and removal events from GCS and cache those information.
std::atomic<int> node_change_count(0);
auto node_subscribe = [&node_change_count](const ClientID &id,
auto node_subscribe = [&node_change_count](const NodeID &id,
const rpc::GcsNodeInfo &result) {
++node_change_count;
};
@@ -1009,7 +1009,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
auto node_info = Mocker::GenNodeInfo(1);
ASSERT_TRUE(RegisterNode(*node_info));
ClientID node_id = ClientID::FromBinary(node_info->node_id());
NodeID node_id = NodeID::FromBinary(node_info->node_id());
std::string key = "CPU";
ASSERT_TRUE(UpdateResources(node_id, key));
auto heartbeat = std::make_shared<rpc::HeartbeatTableData>();
@@ -1023,7 +1023,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
node_info = Mocker::GenNodeInfo(1);
ASSERT_TRUE(RegisterNode(*node_info));
node_id = ClientID::FromBinary(node_info->node_id());
node_id = NodeID::FromBinary(node_info->node_id());
ASSERT_TRUE(UpdateResources(node_id, key));
heartbeat->set_client_id(node_info->node_id());
ASSERT_TRUE(ReportHeartbeat(heartbeat));
@@ -1056,7 +1056,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) {
ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe));
ASSERT_TRUE(AddTask(task_table_data));
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
ASSERT_TRUE(AddTaskLease(task_lease));
WaitForExpectedCount(task_count, 1);
@@ -1065,7 +1065,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) {
RestartGcsServer();
node_id = ClientID::FromRandom();
node_id = NodeID::FromRandom();
task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
ASSERT_TRUE(AddTaskLease(task_lease));
WaitForExpectedCount(task_lease_count, 3);
@@ -1096,7 +1096,7 @@ TEST_F(ServiceBasedGcsClientTest, TestWorkerTableResubscribe) {
TEST_F(ServiceBasedGcsClientTest, TestGcsTableReload) {
ObjectID object_id = ObjectID::FromRandom();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
// Register node to GCS.
auto node_info = Mocker::GenNodeInfo();
+14 -14
View File
@@ -21,12 +21,12 @@
namespace ray {
namespace gcs {
ClientID GcsActor::GetNodeID() const {
NodeID GcsActor::GetNodeID() const {
const auto &raylet_id_binary = actor_table_data_.address().raylet_id();
if (raylet_id_binary.empty()) {
return ClientID::Nil();
return NodeID::Nil();
}
return ClientID::FromBinary(raylet_id_binary);
return NodeID::FromBinary(raylet_id_binary);
}
void GcsActor::UpdateAddress(const rpc::Address &address) {
@@ -47,8 +47,8 @@ WorkerID GcsActor::GetOwnerID() const {
return WorkerID::FromBinary(GetOwnerAddress().worker_id());
}
ClientID GcsActor::GetOwnerNodeID() const {
return ClientID::FromBinary(GetOwnerAddress().raylet_id());
NodeID GcsActor::GetOwnerNodeID() const {
return NodeID::FromBinary(GetOwnerAddress().raylet_id());
}
const rpc::Address &GcsActor::GetOwnerAddress() const {
@@ -425,7 +425,7 @@ Status GcsActorManager::RegisterActor(const ray::rpc::RegisterActorRequest &requ
RAY_CHECK(registered_actors_.emplace(actor->GetActorID(), actor).second);
const auto &owner_address = actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
RAY_CHECK(unresolved_actors_[node_id][worker_id].emplace(actor->GetActorID()).second);
@@ -649,7 +649,7 @@ void GcsActorManager::DestroyActor(const ActorID &actor_id) {
}
absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerNode(
const ClientID &node_id) const {
const NodeID &node_id) const {
absl::flat_hash_set<ActorID> actor_ids;
auto iter = unresolved_actors_.find(node_id);
if (iter != unresolved_actors_.end()) {
@@ -661,7 +661,7 @@ absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerNode(
}
absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerWorker(
const ClientID &node_id, const WorkerID &worker_id) const {
const NodeID &node_id, const WorkerID &worker_id) const {
absl::flat_hash_set<ActorID> actor_ids;
auto iter = unresolved_actors_.find(node_id);
if (iter != unresolved_actors_.end()) {
@@ -673,7 +673,7 @@ absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerWorker(
return actor_ids;
}
void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id,
void GcsActorManager::OnWorkerDead(const ray::NodeID &node_id,
const ray::WorkerID &worker_id,
bool intentional_exit) {
if (intentional_exit) {
@@ -727,7 +727,7 @@ void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id,
ReconstructActor(actor_id, /*need_reschedule=*/!intentional_exit);
}
void GcsActorManager::OnNodeDead(const ClientID &node_id) {
void GcsActorManager::OnNodeDead(const NodeID &node_id) {
RAY_LOG(WARNING) << "Node " << node_id << " failed, reconstructing actors.";
const auto it = owners_.find(node_id);
if (it != owners_.end()) {
@@ -913,7 +913,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) {
RAY_LOG(INFO) << "Loading initial data.";
auto callback = [this,
done](const std::unordered_map<ActorID, ActorTableData> &result) {
std::unordered_map<ClientID, std::vector<WorkerID>> node_to_workers;
std::unordered_map<NodeID, std::vector<WorkerID>> node_to_workers;
for (auto &item : result) {
if (item.second.state() != ray::rpc::ActorTableData::DEAD) {
auto actor = std::make_shared<GcsActor>(item.second);
@@ -925,7 +925,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) {
if (item.second.state() == ray::rpc::ActorTableData::DEPENDENCIES_UNREADY) {
const auto &owner = actor->GetOwnerAddress();
const auto &owner_node = ClientID::FromBinary(owner.raylet_id());
const auto &owner_node = NodeID::FromBinary(owner.raylet_id());
const auto &owner_worker = WorkerID::FromBinary(owner.worker_id());
RAY_CHECK(unresolved_actors_[owner_node][owner_worker]
.emplace(actor->GetActorID())
@@ -1026,7 +1026,7 @@ void GcsActorManager::OnJobFinished(const JobID &job_id) {
RAY_CHECK_OK(gcs_table_storage_->ActorTable().GetByJobId(job_id, on_done));
}
const absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>>
const absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>>
&GcsActorManager::GetCreatedActors() const {
return created_actors_;
}
@@ -1043,7 +1043,7 @@ const absl::flat_hash_map<ActorID, std::vector<RegisterActorCallback>>
void GcsActorManager::RemoveUnresolvedActor(const std::shared_ptr<GcsActor> &actor) {
const auto &owner_address = actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
auto iter = unresolved_actors_.find(node_id);
if (iter != unresolved_actors_.end()) {
+11 -12
View File
@@ -63,18 +63,18 @@ class GcsActor {
actor_table_data_.set_state(rpc::ActorTableData::DEPENDENCIES_UNREADY);
actor_table_data_.mutable_task_spec()->CopyFrom(task_spec);
actor_table_data_.mutable_address()->set_raylet_id(ClientID::Nil().Binary());
actor_table_data_.mutable_address()->set_raylet_id(NodeID::Nil().Binary());
actor_table_data_.mutable_address()->set_worker_id(WorkerID::Nil().Binary());
}
/// Get the node id on which this actor is created.
ClientID GetNodeID() const;
NodeID GetNodeID() const;
/// Get the id of the worker on which this actor is created.
WorkerID GetWorkerID() const;
/// Get the actor's owner ID.
WorkerID GetOwnerID() const;
/// Get the node ID of the actor's owner.
ClientID GetOwnerNodeID() const;
NodeID GetOwnerNodeID() const;
/// Get the address of the actor's owner.
const rpc::Address &GetOwnerAddress() const;
@@ -248,7 +248,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
/// owned an actor, those actors will be destroyed.
///
/// \param node_id The specified node id.
void OnNodeDead(const ClientID &node_id);
void OnNodeDead(const NodeID &node_id);
/// Handle a worker failure. This will restart the associated actor, if any,
/// which may be pending or already created. If the worker owned other
@@ -258,7 +258,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
/// \param worker_id ID of the dead worker.
/// \param intentional_exit Whether the death was intentional. If yes and the
/// worker was an actor, we should not attempt to restart the actor.
void OnWorkerDead(const ClientID &node_id, const WorkerID &worker_id,
void OnWorkerDead(const NodeID &node_id, const WorkerID &worker_id,
bool intentional_exit = false);
/// Handle actor creation task failure. This should be called when scheduling
@@ -288,7 +288,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
/// Get the created actors.
///
/// \return The created actors.
const absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>>
const absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>>
&GetCreatedActors() const;
const absl::flat_hash_map<ActorID, std::shared_ptr<GcsActor>> &GetRegisteredActors()
@@ -323,11 +323,11 @@ class GcsActorManager : public rpc::ActorInfoHandler {
/// Get unresolved actors that were submitted from the specified node.
absl::flat_hash_set<ActorID> GetUnresolvedActorsByOwnerNode(
const ClientID &node_id) const;
const NodeID &node_id) const;
/// Get unresolved actors that were submitted from the specified worker.
absl::flat_hash_set<ActorID> GetUnresolvedActorsByOwnerWorker(
const ClientID &node_id, const WorkerID &worker_id) const;
const NodeID &node_id, const WorkerID &worker_id) const;
private:
/// Reconstruct the specified actor.
@@ -372,18 +372,17 @@ class GcsActorManager : public rpc::ActorInfoHandler {
/// Maps from worker ID to a client and the IDs of the actors owned by that worker.
/// The actor whose dependencies are not resolved should be destroyed once it creator
/// dies.
absl::flat_hash_map<ClientID,
absl::flat_hash_map<WorkerID, absl::flat_hash_set<ActorID>>>
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, absl::flat_hash_set<ActorID>>>
unresolved_actors_;
/// The pending actors which will not be scheduled until there's a resource change.
std::vector<std::shared_ptr<GcsActor>> pending_actors_;
/// Map contains the relationship of node and created actors. Each node ID
/// maps to a map from worker ID to the actor created on that worker.
absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>> created_actors_;
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>> created_actors_;
/// Map from worker ID to a client and the IDs of the actors owned by that
/// worker. An owned actor should be destroyed once it has gone out of scope,
/// according to its owner, or the owner dies.
absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, Owner>> owners_;
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, Owner>> owners_;
/// The scheduler to schedule all registered actors.
std::shared_ptr<gcs::GcsActorSchedulerInterface> gcs_actor_scheduler_;
@@ -89,7 +89,7 @@ void GcsActorScheduler::Reschedule(std::shared_ptr<GcsActor> actor) {
}
}
std::vector<ActorID> GcsActorScheduler::CancelOnNode(const ClientID &node_id) {
std::vector<ActorID> GcsActorScheduler::CancelOnNode(const NodeID &node_id) {
// Remove all the actors from the map associated with this node, and return them as they
// will be reconstructed later.
std::vector<ActorID> actor_ids;
@@ -124,8 +124,7 @@ std::vector<ActorID> GcsActorScheduler::CancelOnNode(const ClientID &node_id) {
return actor_ids;
}
void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id,
const ActorID &actor_id) {
void GcsActorScheduler::CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) {
// NOTE: This method does not currently cancel the outstanding lease request.
// It only removes leasing information from the internal state so that
// RequestWorkerLease ignores the response from raylet.
@@ -134,7 +133,7 @@ void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id,
node_it->second.erase(actor_id);
}
ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id,
ActorID GcsActorScheduler::CancelOnWorker(const NodeID &node_id,
const WorkerID &worker_id) {
// Remove the worker from creating map and return ID of the actor associated with the
// removed worker if exist, else return NilID.
@@ -156,7 +155,7 @@ ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id,
}
void GcsActorScheduler::ReleaseUnusedWorkers(
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) {
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) {
// The purpose of this function is to release leased workers that may be leaked.
// When GCS restarts, it doesn't know which workers it has leased in the previous
// lifecycle. In this case, GCS will send a list of worker ids that are still needed.
@@ -192,7 +191,7 @@ void GcsActorScheduler::LeaseWorkerFromNode(std::shared_ptr<GcsActor> actor,
std::shared_ptr<rpc::GcsNodeInfo> node) {
RAY_CHECK(actor && node);
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
RAY_LOG(INFO) << "Start leasing worker from node " << node_id << " for actor "
<< actor->GetActorID();
@@ -277,7 +276,7 @@ void GcsActorScheduler::HandleWorkerLeasedReply(
// The worker did not succeed in the lease, but the specified node returned a new
// node, and then try again on the new node.
RAY_CHECK(!retry_at_raylet_address.raylet_id().empty());
auto spill_back_node_id = ClientID::FromBinary(retry_at_raylet_address.raylet_id());
auto spill_back_node_id = NodeID::FromBinary(retry_at_raylet_address.raylet_id());
if (auto spill_back_node = gcs_node_manager_.GetNode(spill_back_node_id)) {
actor->UpdateAddress(retry_at_raylet_address);
RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()]
@@ -410,7 +409,7 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsActorScheduler::SelectNodeRandomly() const
std::shared_ptr<WorkerLeaseInterface> GcsActorScheduler::GetOrConnectLeaseClient(
const rpc::Address &raylet_address) {
auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
auto iter = remote_lease_clients_.find(node_id);
if (iter == remote_lease_clients_.end()) {
auto lease_client = lease_client_factory_(raylet_address);
+14 -15
View File
@@ -54,26 +54,26 @@ class GcsActorSchedulerInterface {
///
/// \param node_id ID of the node where the worker is located.
/// \return ID list of actors associated with the specified node id.
virtual std::vector<ActorID> CancelOnNode(const ClientID &node_id) = 0;
virtual std::vector<ActorID> CancelOnNode(const NodeID &node_id) = 0;
/// Cancel a outstanding leasing request to raylets.
///
/// \param node_id ID of the node where the actor leasing request has been sent.
/// \param actor_id ID of an actor.
virtual void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) = 0;
virtual void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) = 0;
/// Cancel the actor that is being scheduled to the specified worker.
///
/// \param node_id ID of the node where the worker is located.
/// \param worker_id ID of the worker that the actor is creating on.
/// \return ID of actor associated with the specified node id and worker id.
virtual ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) = 0;
virtual ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) = 0;
/// Notify raylets to release unused workers.
///
/// \param node_to_workers Workers used by each node.
virtual void ReleaseUnusedWorkers(
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) = 0;
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) = 0;
virtual ~GcsActorSchedulerInterface() {}
};
@@ -120,7 +120,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
///
/// \param node_id ID of the node where the worker is located.
/// \return ID list of actors associated with the specified node id.
std::vector<ActorID> CancelOnNode(const ClientID &node_id) override;
std::vector<ActorID> CancelOnNode(const NodeID &node_id) override;
/// Cancel a outstanding leasing request to raylets.
///
@@ -130,20 +130,20 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
///
/// \param node_id ID of the node where the actor leasing request has been sent.
/// \param actor_id ID of an actor.
void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) override;
void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) override;
/// Cancel the actor that is being scheduled to the specified worker.
///
/// \param node_id ID of the node where the worker is located.
/// \param worker_id ID of the worker that the actor is creating on.
/// \return ID of actor associated with the specified node id and worker id.
ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) override;
ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) override;
/// Notify raylets to release unused workers.
///
/// \param node_to_workers Workers used by each node.
void ReleaseUnusedWorkers(const std::unordered_map<ClientID, std::vector<WorkerID>>
&node_to_workers) override;
void ReleaseUnusedWorkers(
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) override;
protected:
/// The GcsLeasedWorker is kind of abstraction of remote leased worker inside raylet. It
@@ -178,7 +178,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
WorkerID GetWorkerID() const { return WorkerID::FromBinary(address_.worker_id()); }
/// Get the NodeID of this leased worker.
ClientID GetNodeID() const { return ClientID::FromBinary(address_.raylet_id()); }
NodeID GetNodeID() const { return NodeID::FromBinary(address_.raylet_id()); }
/// Get the id of the actor which is assigned to this leased worker.
ActorID GetAssignedActorID() const { return assigned_actor_id_; }
@@ -269,15 +269,14 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
/// Map from node ID to the set of actors for whom we are trying to acquire a lease from
/// that node. This is needed so that we can retry lease requests from the node until we
/// receive a reply or the node is removed.
absl::flat_hash_map<ClientID, absl::flat_hash_set<ActorID>>
node_to_actors_when_leasing_;
absl::flat_hash_map<NodeID, absl::flat_hash_set<ActorID>> node_to_actors_when_leasing_;
/// Map from node ID to the workers on which we are trying to create actors. This is
/// needed so that we can cancel actor creation requests if the worker is removed.
absl::flat_hash_map<ClientID,
absl::flat_hash_map<NodeID,
absl::flat_hash_map<WorkerID, std::shared_ptr<GcsLeasedWorker>>>
node_to_workers_when_creating_;
/// The cached node clients which are used to communicate with raylet to lease workers.
absl::flat_hash_map<ClientID, std::shared_ptr<WorkerLeaseInterface>>
absl::flat_hash_map<NodeID, std::shared_ptr<WorkerLeaseInterface>>
remote_lease_clients_;
/// Reference of GcsNodeManager.
const GcsNodeManager &gcs_node_manager_;
@@ -290,7 +289,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
/// Factory for producing new clients to request leases from remote nodes.
LeaseClientFactoryFn lease_client_factory_;
/// The nodes which are releasing unused workers.
absl::flat_hash_set<ClientID> nodes_of_releasing_unused_workers_;
absl::flat_hash_set<NodeID> nodes_of_releasing_unused_workers_;
/// The cached core worker clients which are used to communicate with leased worker.
rpc::CoreWorkerClientPool core_worker_clients_;
};
+18 -18
View File
@@ -25,7 +25,7 @@ GcsNodeManager::NodeFailureDetector::NodeFailureDetector(
boost::asio::io_service &io_service,
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage,
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub,
std::function<void(const ClientID &)> on_node_death_callback)
std::function<void(const NodeID &)> on_node_death_callback)
: gcs_table_storage_(std::move(gcs_table_storage)),
on_node_death_callback_(std::move(on_node_death_callback)),
num_heartbeats_timeout_(RayConfig::instance().num_heartbeats_timeout()),
@@ -40,12 +40,12 @@ void GcsNodeManager::NodeFailureDetector::Start() {
}
}
void GcsNodeManager::NodeFailureDetector::AddNode(const ray::ClientID &node_id) {
void GcsNodeManager::NodeFailureDetector::AddNode(const ray::NodeID &node_id) {
heartbeats_.emplace(node_id, num_heartbeats_timeout_);
}
void GcsNodeManager::NodeFailureDetector::HandleHeartbeat(
const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat_data) {
const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat_data) {
auto iter = heartbeats_.find(node_id);
if (iter == heartbeats_.end()) {
// Ignore this heartbeat as the node is not registered.
@@ -145,7 +145,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service,
: main_io_service_(main_io_service),
node_failure_detector_(new NodeFailureDetector(
node_failure_detector_io_service, gcs_table_storage, gcs_pub_sub,
[this](const ClientID &node_id) {
[this](const NodeID &node_id) {
// Post this to main event loop to avoid potential concurrency issues.
main_io_service_.post([this, node_id] {
if (auto node = RemoveNode(node_id, /* is_intended = */ false)) {
@@ -171,7 +171,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service,
void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request,
rpc::RegisterNodeReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.node_info().node_id());
NodeID node_id = NodeID::FromBinary(request.node_info().node_id());
RAY_LOG(INFO) << "Registering node info, node id = " << node_id
<< ", address = " << request.node_info().node_manager_address();
AddNode(std::make_shared<rpc::GcsNodeInfo>(request.node_info()));
@@ -191,7 +191,7 @@ void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request,
void GcsNodeManager::HandleUnregisterNode(const rpc::UnregisterNodeRequest &request,
rpc::UnregisterNodeReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id;
if (auto node = RemoveNode(node_id, /* is_intended = */ true)) {
node->set_state(rpc::GcsNodeInfo::DEAD);
@@ -228,7 +228,7 @@ void GcsNodeManager::HandleGetAllNodeInfo(const rpc::GetAllNodeInfoRequest &requ
void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &request,
rpc::ReportHeartbeatReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.heartbeat().client_id());
NodeID node_id = NodeID::FromBinary(request.heartbeat().client_id());
auto heartbeat_data = std::make_shared<rpc::HeartbeatTableData>();
heartbeat_data->CopyFrom(request.heartbeat());
@@ -248,7 +248,7 @@ void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &re
void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request,
rpc::GetResourcesReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
auto iter = cluster_resources_.find(node_id);
if (iter != cluster_resources_.end()) {
for (auto &resource : iter->second.items()) {
@@ -261,7 +261,7 @@ void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request,
void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &request,
rpc::UpdateResourcesReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
RAY_LOG(DEBUG) << "Updating resources, node id = " << node_id;
auto iter = cluster_resources_.find(node_id);
auto to_be_updated_resources = request.resources();
@@ -298,7 +298,7 @@ void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &re
void GcsNodeManager::HandleDeleteResources(const rpc::DeleteResourcesRequest &request,
rpc::DeleteResourcesReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id;
auto resource_names = VectorFromProtobuf(request.resource_name_list());
auto iter = cluster_resources_.find(node_id);
@@ -355,7 +355,7 @@ void GcsNodeManager::HandleGetInternalConfig(const rpc::GetInternalConfigRequest
}
std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::GetNode(
const ray::ClientID &node_id) const {
const ray::NodeID &node_id) const {
auto iter = alive_nodes_.find(node_id);
if (iter == alive_nodes_.end()) {
return nullptr;
@@ -365,7 +365,7 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::GetNode(
}
void GcsNodeManager::AddNode(std::shared_ptr<rpc::GcsNodeInfo> node) {
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
auto iter = alive_nodes_.find(node_id);
if (iter == alive_nodes_.end()) {
alive_nodes_.emplace(node_id, node);
@@ -385,7 +385,7 @@ void GcsNodeManager::AddNode(std::shared_ptr<rpc::GcsNodeInfo> node) {
}
std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::RemoveNode(
const ray::ClientID &node_id, bool is_intended /*= false*/) {
const ray::NodeID &node_id, bool is_intended /*= false*/) {
RAY_LOG(INFO) << "Removing node, node id = " << node_id;
std::shared_ptr<rpc::GcsNodeInfo> removed_node;
auto iter = alive_nodes_.find(node_id);
@@ -423,8 +423,8 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::RemoveNode(
void GcsNodeManager::LoadInitialData(const EmptyCallback &done) {
RAY_LOG(INFO) << "Loading initial data.";
auto get_node_callback = [this, done](
const std::unordered_map<ClientID, GcsNodeInfo> &result) {
auto get_node_callback = [this,
done](const std::unordered_map<NodeID, GcsNodeInfo> &result) {
for (auto &item : result) {
if (item.second.state() == rpc::GcsNodeInfo::ALIVE) {
// Call `AddNode` for this node to make sure it is tracked by the failure
@@ -436,7 +436,7 @@ void GcsNodeManager::LoadInitialData(const EmptyCallback &done) {
}
auto get_node_resource_callback =
[this, done](const std::unordered_map<ClientID, ResourceMap> &result) {
[this, done](const std::unordered_map<NodeID, ResourceMap> &result) {
for (auto &item : result) {
if (alive_nodes_.count(item.first)) {
cluster_resources_[item.first] = item.second;
@@ -458,13 +458,13 @@ void GcsNodeManager::StartNodeFailureDetector() {
}
void GcsNodeManager::UpdateNodeRealtimeResources(
const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat) {
const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat) {
auto resources_available = MapFromProtobuf(heartbeat.resources_available());
cluster_realtime_resources_[node_id] =
std::make_shared<ResourceSet>(resources_available);
}
const absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>>
const absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>>
&GcsNodeManager::GetClusterRealtimeResources() const {
return cluster_realtime_resources_;
}
+16 -16
View File
@@ -99,20 +99,20 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
/// \param node_id The ID of the node to be removed.
/// \param is_intended False if this is triggered by `node_failure_detector_`, else
/// True.
std::shared_ptr<rpc::GcsNodeInfo> RemoveNode(const ClientID &node_id,
std::shared_ptr<rpc::GcsNodeInfo> RemoveNode(const NodeID &node_id,
bool is_intended = false);
/// Get alive node by ID.
///
/// \param node_id The id of the node.
/// \return the node if it is alive else return nullptr.
std::shared_ptr<rpc::GcsNodeInfo> GetNode(const ClientID &node_id) const;
std::shared_ptr<rpc::GcsNodeInfo> GetNode(const NodeID &node_id) const;
/// Get all alive nodes.
///
/// \return all alive nodes.
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>>
&GetAllAliveNodes() const {
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &GetAllAliveNodes()
const {
return alive_nodes_;
}
@@ -144,11 +144,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
void StartNodeFailureDetector();
// Update node realtime resources.
void UpdateNodeRealtimeResources(const ClientID &node_id,
void UpdateNodeRealtimeResources(const NodeID &node_id,
const rpc::HeartbeatTableData &heartbeat);
/// Get cluster realtime resources.
const absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>>
const absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>>
&GetClusterRealtimeResources() const;
protected:
@@ -165,7 +165,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
boost::asio::io_service &io_service,
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage,
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub,
std::function<void(const ClientID &)> on_node_death_callback);
std::function<void(const NodeID &)> on_node_death_callback);
// Note: To avoid heartbeats being delayed by main thread, all public methods below
// should be posted to its own IO service.
@@ -177,13 +177,13 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
/// Only if the node has registered, its heartbeat data will be accepted.
///
/// \param node_id ID of the node to be registered.
void AddNode(const ClientID &node_id);
void AddNode(const NodeID &node_id);
/// Handle a heartbeat from a Raylet.
///
/// \param node_id The client ID of the Raylet that sent the heartbeat.
/// \param heartbeat_data The heartbeat sent by the client.
void HandleHeartbeat(const ClientID &node_id,
void HandleHeartbeat(const NodeID &node_id,
const rpc::HeartbeatTableData &heartbeat_data);
protected:
@@ -206,7 +206,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
/// Storage for GCS tables.
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
/// The callback of node death.
std::function<void(const ClientID &)> on_node_death_callback_;
std::function<void(const NodeID &)> on_node_death_callback_;
/// The number of heartbeats that can be missed before a node is removed.
int64_t num_heartbeats_timeout_;
// Only the changed part will be included in heartbeat if this is true.
@@ -215,9 +215,9 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
boost::asio::deadline_timer detect_timer_;
/// For each Raylet that we receive a heartbeat from, the number of ticks
/// that may pass before the Raylet will be declared dead.
absl::flat_hash_map<ClientID, int64_t> heartbeats_;
absl::flat_hash_map<NodeID, int64_t> heartbeats_;
/// A buffer containing heartbeats received from node managers in the last tick.
absl::flat_hash_map<ClientID, rpc::HeartbeatTableData> heartbeat_buffer_;
absl::flat_hash_map<NodeID, rpc::HeartbeatTableData> heartbeat_buffer_;
/// A publisher for publishing gcs messages.
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
/// Is the detect started.
@@ -232,11 +232,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
/// The event loop for node failure detector.
boost::asio::io_service &node_failure_detector_service_;
/// Alive nodes.
absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> alive_nodes_;
absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> alive_nodes_;
/// Dead nodes.
absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> dead_nodes_;
absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> dead_nodes_;
/// Cluster resources.
absl::flat_hash_map<ClientID, rpc::ResourceMap> cluster_resources_;
absl::flat_hash_map<NodeID, rpc::ResourceMap> cluster_resources_;
/// Listeners which monitors the addition of nodes.
std::vector<std::function<void(std::shared_ptr<rpc::GcsNodeInfo>)>>
node_added_listeners_;
@@ -248,7 +248,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
/// Storage for GCS tables.
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
/// Cluster realtime resources.
absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>> cluster_realtime_resources_;
absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>> cluster_realtime_resources_;
};
} // namespace gcs
+11 -11
View File
@@ -60,7 +60,7 @@ void GcsObjectManager::HandleAddObjectLocation(
const rpc::AddObjectLocationRequest &request, rpc::AddObjectLocationReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ObjectID object_id = ObjectID::FromBinary(request.object_id());
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
RAY_LOG(DEBUG) << "Adding object location, job id = " << object_id.TaskId().JobId()
<< ", object id = " << object_id << ", node id = " << node_id;
AddObjectLocationInCache(object_id, node_id);
@@ -100,7 +100,7 @@ void GcsObjectManager::HandleRemoveObjectLocation(
const rpc::RemoveObjectLocationRequest &request,
rpc::RemoveObjectLocationReply *reply, rpc::SendReplyCallback send_reply_callback) {
ObjectID object_id = ObjectID::FromBinary(request.object_id());
ClientID node_id = ClientID::FromBinary(request.node_id());
NodeID node_id = NodeID::FromBinary(request.node_id());
RAY_LOG(DEBUG) << "Removing object location, job id = " << object_id.TaskId().JobId()
<< ", object id = " << object_id << ", node id = " << node_id;
RemoveObjectLocationInCache(object_id, node_id);
@@ -143,7 +143,7 @@ void GcsObjectManager::HandleRemoveObjectLocation(
}
void GcsObjectManager::AddObjectsLocation(
const ClientID &node_id, const absl::flat_hash_set<ObjectID> &object_ids) {
const NodeID &node_id, const absl::flat_hash_set<ObjectID> &object_ids) {
// TODO(micafan) Optimize the lock when necessary.
// Maybe use read/write lock. Or reduce the granularity of the lock.
absl::MutexLock lock(&mutex_);
@@ -159,7 +159,7 @@ void GcsObjectManager::AddObjectsLocation(
}
void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id,
const ClientID &node_id) {
const NodeID &node_id) {
absl::MutexLock lock(&mutex_);
auto *objects_on_node = GetObjectSetByNode(node_id, /* create_if_not_exist */ true);
@@ -170,7 +170,7 @@ void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id,
object_locations->emplace(node_id);
}
absl::flat_hash_set<ClientID> GcsObjectManager::GetObjectLocations(
absl::flat_hash_set<NodeID> GcsObjectManager::GetObjectLocations(
const ObjectID &object_id) {
absl::MutexLock lock(&mutex_);
@@ -178,10 +178,10 @@ absl::flat_hash_set<ClientID> GcsObjectManager::GetObjectLocations(
if (object_locations) {
return *object_locations;
}
return absl::flat_hash_set<ClientID>{};
return absl::flat_hash_set<NodeID>{};
}
void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) {
void GcsObjectManager::OnNodeRemoved(const NodeID &node_id) {
absl::MutexLock lock(&mutex_);
ObjectSet objects_on_node;
@@ -207,7 +207,7 @@ void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) {
}
void GcsObjectManager::RemoveObjectLocationInCache(const ObjectID &object_id,
const ClientID &node_id) {
const NodeID &node_id) {
absl::MutexLock lock(&mutex_);
auto *object_locations = GetObjectLocationSet(object_id);
@@ -244,7 +244,7 @@ GcsObjectManager::LocationSet *GcsObjectManager::GetObjectLocationSet(
}
GcsObjectManager::ObjectSet *GcsObjectManager::GetObjectSetByNode(
const ClientID &node_id, bool create_if_not_exist) {
const NodeID &node_id, bool create_if_not_exist) {
ObjectSet *objects_on_node = nullptr;
auto it = node_to_objects_.find(node_id);
@@ -271,11 +271,11 @@ void GcsObjectManager::LoadInitialData(const EmptyCallback &done) {
RAY_LOG(INFO) << "Loading initial data.";
auto callback = [this, done](
const std::unordered_map<ObjectID, ObjectTableDataList> &result) {
absl::flat_hash_map<ClientID, ObjectSet> node_to_objects;
absl::flat_hash_map<NodeID, ObjectSet> node_to_objects;
for (auto &item : result) {
auto object_list = item.second;
for (int index = 0; index < object_list.items_size(); ++index) {
node_to_objects[ClientID::FromBinary(object_list.items(index).manager())].insert(
node_to_objects[NodeID::FromBinary(object_list.items(index).manager())].insert(
item.first);
}
}
+8 -8
View File
@@ -33,7 +33,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
[this](const std::shared_ptr<rpc::GcsNodeInfo> &node) {
// All of the related actors should be reconstructed when a node is removed from
// the GCS.
OnNodeRemoved(ClientID::FromBinary(node->node_id()));
OnNodeRemoved(NodeID::FromBinary(node->node_id()));
});
}
@@ -60,14 +60,14 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
void LoadInitialData(const EmptyCallback &done);
protected:
typedef absl::flat_hash_set<ClientID> LocationSet;
typedef absl::flat_hash_set<NodeID> LocationSet;
/// Add a location of objects.
/// If the GCS server restarts, this function is used to reload data from storage.
///
/// \param node_id The object location that will be added.
/// \param object_ids The ids of objects which location will be added.
void AddObjectsLocation(const ClientID &node_id,
void AddObjectsLocation(const NodeID &node_id,
const absl::flat_hash_set<ObjectID> &object_ids)
LOCKS_EXCLUDED(mutex_);
@@ -75,7 +75,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
///
/// \param object_id The id of object.
/// \param node_id The node id of the new location.
void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id)
void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id)
LOCKS_EXCLUDED(mutex_);
/// Get all locations of the given object.
@@ -87,13 +87,13 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
/// Handler if a node is removed.
///
/// \param node_id The node that will be removed.
void OnNodeRemoved(const ClientID &node_id) LOCKS_EXCLUDED(mutex_);
void OnNodeRemoved(const NodeID &node_id) LOCKS_EXCLUDED(mutex_);
/// Remove object's location.
///
/// \param object_id The id of the object which location will be removed.
/// \param node_id The location that will be removed.
void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id)
void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id)
LOCKS_EXCLUDED(mutex_);
private:
@@ -118,7 +118,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
/// \param node_id The id of node to lookup.
/// \param create_if_not_exist Whether to create a new one if not exist.
/// \return ObjectSet *
GcsObjectManager::ObjectSet *GetObjectSetByNode(const ClientID &node_id,
GcsObjectManager::ObjectSet *GetObjectSetByNode(const NodeID &node_id,
bool create_if_not_exist = false)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -130,7 +130,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
/// Mapping from node id to objects that held by the node.
/// This is the local cache of nodes' objects in the storage.
absl::flat_hash_map<ClientID, ObjectSet> node_to_objects_ GUARDED_BY(mutex_);
absl::flat_hash_map<NodeID, ObjectSet> node_to_objects_ GUARDED_BY(mutex_);
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
@@ -53,7 +53,7 @@ std::vector<std::shared_ptr<BundleSpecification>> GcsPlacementGroup::GetUnplaced
const auto &bundles = placement_group_table_data_.bundles();
std::vector<std::shared_ptr<BundleSpecification>> unplaced_bundles;
for (auto &bundle : bundles) {
if (ClientID::FromBinary(bundle.node_id()).IsNil()) {
if (NodeID::FromBinary(bundle.node_id()).IsNil()) {
unplaced_bundles.push_back(std::make_shared<BundleSpecification>(bundle));
}
}
@@ -326,7 +326,7 @@ void GcsPlacementGroupManager::RetryCreatingPlacementGroup() {
RayConfig::instance().gcs_create_placement_group_retry_interval_ms());
}
void GcsPlacementGroupManager::OnNodeDead(const ClientID &node_id) {
void GcsPlacementGroupManager::OnNodeDead(const NodeID &node_id) {
RAY_LOG(WARNING) << "Node " << node_id
<< " failed, rescheduling the placement groups on the dead node.";
auto bundles = gcs_placement_group_scheduler_->GetBundlesOnNode(node_id);
@@ -167,7 +167,7 @@ class GcsPlacementGroupManager : public rpc::PlacementGroupInfoHandler {
/// specified node id.
///
/// \param node_id The specified node id.
void OnNodeDead(const ClientID &node_id);
void OnNodeDead(const NodeID &node_id);
private:
/// Try to create placement group after a short time.
@@ -46,7 +46,7 @@ ScheduleMap GcsStrictPackStrategy::Schedule(
// Filter candidate nodes.
const auto &alive_nodes = context->node_manager_.GetClusterRealtimeResources();
std::vector<std::pair<int64_t, ClientID>> candidate_nodes;
std::vector<std::pair<int64_t, NodeID>> candidate_nodes;
for (auto &node : alive_nodes) {
if (required_resources.IsSubset(*node.second)) {
candidate_nodes.emplace_back((*context->node_to_bundles_)[node.first], node.first);
@@ -61,8 +61,8 @@ ScheduleMap GcsStrictPackStrategy::Schedule(
std::sort(
std::begin(candidate_nodes), std::end(candidate_nodes),
[](const std::pair<int64_t, ClientID> &left,
const std::pair<int64_t, ClientID> &right) { return left.first < right.first; });
[](const std::pair<int64_t, NodeID> &left,
const std::pair<int64_t, NodeID> &right) { return left.first < right.first; });
for (auto &bundle : bundles) {
schedule_map[bundle->BundleId()] = candidate_nodes.front().second;
@@ -287,7 +287,7 @@ void GcsPlacementGroupScheduler::PrepareResources(
const std::shared_ptr<BundleSpecification> &bundle,
const std::shared_ptr<ray::rpc::GcsNodeInfo> &node, const StatusCallback &callback) {
const auto lease_client = GetLeaseClientFromNode(node);
const auto node_id = ClientID::FromBinary(node->node_id());
const auto node_id = NodeID::FromBinary(node->node_id());
RAY_LOG(INFO) << "Preparing resource from node " << node_id
<< " for a bundle: " << bundle->DebugString();
lease_client->PrepareBundleResources(
@@ -311,7 +311,7 @@ void GcsPlacementGroupScheduler::CommitResources(
const std::shared_ptr<ray::rpc::GcsNodeInfo> &node, const StatusCallback callback) {
RAY_CHECK(node != nullptr);
const auto lease_client = GetLeaseClientFromNode(node);
const auto node_id = ClientID::FromBinary(node->node_id());
const auto node_id = NodeID::FromBinary(node->node_id());
RAY_LOG(INFO) << "Committing resource to a node " << node_id
<< " for a bundle: " << bundle->DebugString();
lease_client->CommitBundleResources(
@@ -339,7 +339,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve(
<< " has already removed. Cancellation request will be ignored.";
return;
}
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
RAY_LOG(INFO) << "Cancelling the resource reserved for bundle: "
<< bundle_spec->DebugString() << " at node " << node_id;
const auto return_client = GetLeaseClientFromNode(node);
@@ -353,7 +353,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve(
std::shared_ptr<ResourceReserveInterface>
GcsPlacementGroupScheduler::GetOrConnectLeaseClient(const rpc::Address &raylet_address) {
auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
auto iter = remote_lease_clients_.find(node_id);
if (iter == remote_lease_clients_.end()) {
auto lease_client = lease_client_factory_(raylet_address);
@@ -499,7 +499,7 @@ std::unique_ptr<ScheduleContext> GcsPlacementGroupScheduler::GetScheduleContext(
auto &alive_nodes = gcs_node_manager_.GetAllAliveNodes();
committed_bundle_location_index_.AddNodes(alive_nodes);
auto node_to_bundles = std::make_shared<absl::flat_hash_map<ClientID, int64_t>>();
auto node_to_bundles = std::make_shared<absl::flat_hash_map<NodeID, int64_t>>();
for (const auto &node_it : alive_nodes) {
const auto &node_id = node_it.first;
const auto &bundle_locations_on_node =
@@ -517,7 +517,7 @@ std::unique_ptr<ScheduleContext> GcsPlacementGroupScheduler::GetScheduleContext(
}
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>>
GcsPlacementGroupScheduler::GetBundlesOnNode(const ClientID &node_id) {
GcsPlacementGroupScheduler::GetBundlesOnNode(const NodeID &node_id) {
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> bundles_on_node;
const auto &maybe_bundle_locations =
committed_bundle_location_index_.GetBundleLocationsOnNode(node_id);
@@ -546,7 +546,7 @@ void BundleLocationIndex::AddBundleLocations(
}
}
bool BundleLocationIndex::Erase(const ClientID &node_id) {
bool BundleLocationIndex::Erase(const NodeID &node_id) {
const auto leased_bundles_it = node_to_leased_bundles_.find(node_id);
if (leased_bundles_it == node_to_leased_bundles_.end()) {
return false;
@@ -604,7 +604,7 @@ BundleLocationIndex::GetBundleLocations(const PlacementGroupID &placement_group_
}
const absl::optional<std::shared_ptr<BundleLocations> const>
BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) {
BundleLocationIndex::GetBundleLocationsOnNode(const NodeID &node_id) {
auto it = node_to_leased_bundles_.find(node_id);
if (it == node_to_leased_bundles_.end()) {
return {};
@@ -613,7 +613,7 @@ BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) {
}
void BundleLocationIndex::AddNodes(
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes) {
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes) {
for (const auto &iter : nodes) {
if (!node_to_leased_bundles_.contains(iter.first)) {
node_to_leased_bundles_[iter.first] = std::make_shared<BundleLocations>();
@@ -630,13 +630,13 @@ LeaseStatusTracker::LeaseStatusTracker(
}
bool LeaseStatusTracker::MarkPreparePhaseStarted(
const ClientID &node_id, std::shared_ptr<BundleSpecification> bundle) {
const NodeID &node_id, std::shared_ptr<BundleSpecification> bundle) {
const auto &bundle_id = bundle->BundleId();
return node_to_bundles_when_preparing_[node_id].emplace(bundle_id).second;
}
void LeaseStatusTracker::MarkPrepareRequestReturned(
const ClientID &node_id, const std::shared_ptr<BundleSpecification> bundle,
const NodeID &node_id, const std::shared_ptr<BundleSpecification> bundle,
const Status &status) {
RAY_CHECK(prepare_request_returned_count_ <= bundles_to_schedule_.size());
auto leasing_bundles = node_to_bundles_when_preparing_.find(node_id);
@@ -670,7 +670,7 @@ bool LeaseStatusTracker::AllPrepareRequestsSuccessful() const {
}
void LeaseStatusTracker::MarkCommitRequestReturned(
const ClientID &node_id, const std::shared_ptr<BundleSpecification> bundle,
const NodeID &node_id, const std::shared_ptr<BundleSpecification> bundle,
const Status &status) {
commit_request_returned_count_ += 1;
// If the request succeeds, record it.
@@ -37,9 +37,10 @@ struct pair_hash {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
using ScheduleMap = std::unordered_map<BundleID, ClientID, pair_hash>;
using BundleLocations = absl::flat_hash_map<
BundleID, std::pair<ClientID, std::shared_ptr<BundleSpecification>>, pair_hash>;
using ScheduleMap = std::unordered_map<BundleID, NodeID, pair_hash>;
using BundleLocations =
absl::flat_hash_map<BundleID, std::pair<NodeID, std::shared_ptr<BundleSpecification>>,
pair_hash>;
class GcsPlacementGroup;
@@ -60,7 +61,7 @@ class GcsPlacementGroupSchedulerInterface {
/// \param node_id ID of the dead node.
/// \return The bundles belong to the dead node.
virtual absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
const ClientID &node_id) = 0;
const NodeID &node_id) = 0;
/// Destroy bundle resources from all nodes in the placement group.
virtual void DestroyPlacementGroupBundleResourcesIfExists(
@@ -79,7 +80,7 @@ class GcsPlacementGroupSchedulerInterface {
/// ScheduleContext provides information that are needed for bundle scheduling decision.
class ScheduleContext {
public:
ScheduleContext(std::shared_ptr<absl::flat_hash_map<ClientID, int64_t>> node_to_bundles,
ScheduleContext(std::shared_ptr<absl::flat_hash_map<NodeID, int64_t>> node_to_bundles,
const absl::optional<std::shared_ptr<BundleLocations>> bundle_locations,
const GcsNodeManager &node_manager)
: node_to_bundles_(std::move(node_to_bundles)),
@@ -87,7 +88,7 @@ class ScheduleContext {
node_manager_(node_manager) {}
// Key is node id, value is the number of bundles on the node.
const std::shared_ptr<absl::flat_hash_map<ClientID, int64_t>> node_to_bundles_;
const std::shared_ptr<absl::flat_hash_map<NodeID, int64_t>> node_to_bundles_;
// The locations of existing bundles for this placement group.
const absl::optional<std::shared_ptr<BundleLocations>> bundle_locations_;
@@ -158,7 +159,7 @@ class LeaseStatusTracker {
/// \param node_id Id of a node where prepare request is sent.
/// \param bundle Bundle specification the node is supposed to prepare.
/// \return False if the prepare phase was already started. True otherwise.
bool MarkPreparePhaseStarted(const ClientID &node_id,
bool MarkPreparePhaseStarted(const NodeID &node_id,
std::shared_ptr<BundleSpecification> bundle);
/// Indicate the tracker that all prepare requests are returned.
@@ -167,7 +168,7 @@ class LeaseStatusTracker {
/// \param bundle Bundle specification the node was supposed to schedule.
/// \param status Status of the prepare response.
/// \param void
void MarkPrepareRequestReturned(const ClientID &node_id,
void MarkPrepareRequestReturned(const NodeID &node_id,
std::shared_ptr<BundleSpecification> bundle,
const Status &status);
@@ -186,7 +187,7 @@ class LeaseStatusTracker {
/// \param node_id Id of a node where commit request is returned.
/// \param bundle Bundle specification the node was supposed to schedule.
/// \param status Status of the returned commit request.
void MarkCommitRequestReturned(const ClientID &node_id,
void MarkCommitRequestReturned(const NodeID &node_id,
const std::shared_ptr<BundleSpecification> bundle,
const Status &status);
@@ -245,7 +246,7 @@ class LeaseStatusTracker {
/// Location of bundles that prepare requests were sent.
/// If prepare succeeds, the decision will be set as schedule_map[bundles[pos]]
/// else will be set ClientID::Nil().
/// else will be set NodeID::Nil().
std::shared_ptr<BundleLocations> preparing_bundle_locations_;
/// Number of prepare requests that are returned.
@@ -264,7 +265,7 @@ class LeaseStatusTracker {
/// from that node. This is needed so that we can retry lease requests from the node
/// until we receive a reply or the node is removed.
/// TODO(sang): We don't currently handle retry.
absl::flat_hash_map<ClientID, absl::flat_hash_set<BundleID>>
absl::flat_hash_map<NodeID, absl::flat_hash_set<BundleID>>
node_to_bundles_when_preparing_;
/// Bundles to schedule.
@@ -289,7 +290,7 @@ class BundleLocationIndex {
///
/// \param node_id The id of node.
/// \return True if succeed. False otherwise.
bool Erase(const ClientID &node_id);
bool Erase(const NodeID &node_id);
/// Erase bundle locations associated with a given placement group id.
///
@@ -309,19 +310,19 @@ class BundleLocationIndex {
/// \param node_id Node id of this bundle locations.
/// \return Bundle locations that are associated with a given node id.
const absl::optional<std::shared_ptr<BundleLocations> const> GetBundleLocationsOnNode(
const ClientID &node_id);
const NodeID &node_id);
/// Update the index to contain new node information. Should be used only when new node
/// is added to the cluster.
///
/// \param alive_nodes map of alive nodes.
void AddNodes(
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes);
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes);
private:
/// Map from node ID to the set of bundles. This is used to lookup bundles at each node
/// when a node is dead.
absl::flat_hash_map<ClientID, std::shared_ptr<BundleLocations>> node_to_leased_bundles_;
absl::flat_hash_map<NodeID, std::shared_ptr<BundleLocations>> node_to_leased_bundles_;
/// A map from placement group id to bundle locations.
/// It is used to destroy bundles for the placement group.
@@ -382,7 +383,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface {
/// \param node_id ID of the dead node.
/// \return The bundles belong to the dead node.
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
const ClientID &node_id) override;
const NodeID &node_id) override;
protected:
/// Send a bundle PREPARE request to a node. The PREPARE request will lock resources
@@ -459,7 +460,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface {
const GcsNodeManager &gcs_node_manager_;
/// The cached node clients which are used to communicate with raylet to lease workers.
absl::flat_hash_map<ClientID, std::shared_ptr<ResourceReserveInterface>>
absl::flat_hash_map<NodeID, std::shared_ptr<ResourceReserveInterface>>
remote_lease_clients_;
/// Factory for producing new clients to request leases from remote nodes.
+3 -3
View File
@@ -206,8 +206,8 @@ void GcsServer::InitGcsActorManager() {
[this](std::shared_ptr<rpc::GcsNodeInfo> node) {
// All of the related placement groups and actors should be reconstructed when a
// node is removed from the GCS.
gcs_placement_group_manager_->OnNodeDead(ClientID::FromBinary(node->node_id()));
gcs_actor_manager_->OnNodeDead(ClientID::FromBinary(node->node_id()));
gcs_placement_group_manager_->OnNodeDead(NodeID::FromBinary(node->node_id()));
gcs_actor_manager_->OnNodeDead(NodeID::FromBinary(node->node_id()));
});
auto on_subscribe = [this](const std::string &id, const std::string &data) {
@@ -215,7 +215,7 @@ void GcsServer::InitGcsActorManager() {
worker_failure_data.ParseFromString(data);
auto &worker_address = worker_failure_data.worker_address();
WorkerID worker_id = WorkerID::FromBinary(id);
ClientID node_id = ClientID::FromBinary(worker_address.raylet_id());
NodeID node_id = NodeID::FromBinary(worker_address.raylet_id());
gcs_actor_manager_->OnWorkerDead(node_id, worker_id,
worker_failure_data.intentional_disconnect());
};
+4 -4
View File
@@ -127,10 +127,10 @@ Status GcsTableWithJobId<Key, Data>::BatchDelete(const std::vector<Key> &keys,
}
template class GcsTable<JobID, JobTableData>;
template class GcsTable<ClientID, GcsNodeInfo>;
template class GcsTable<ClientID, ResourceMap>;
template class GcsTable<ClientID, HeartbeatTableData>;
template class GcsTable<ClientID, HeartbeatBatchTableData>;
template class GcsTable<NodeID, GcsNodeInfo>;
template class GcsTable<NodeID, ResourceMap>;
template class GcsTable<NodeID, HeartbeatTableData>;
template class GcsTable<NodeID, HeartbeatBatchTableData>;
template class GcsTable<JobID, ErrorTableData>;
template class GcsTable<UniqueID, ProfileTableData>;
template class GcsTable<WorkerID, WorkerTableData>;
+4 -4
View File
@@ -245,7 +245,7 @@ class GcsObjectTable : public GcsTableWithJobId<ObjectID, ObjectTableDataList> {
JobID GetJobIdFromKey(const ObjectID &key) override { return key.TaskId().JobId(); }
};
class GcsNodeTable : public GcsTable<ClientID, GcsNodeInfo> {
class GcsNodeTable : public GcsTable<NodeID, GcsNodeInfo> {
public:
explicit GcsNodeTable(std::shared_ptr<StoreClient> &store_client)
: GcsTable(store_client) {
@@ -253,7 +253,7 @@ class GcsNodeTable : public GcsTable<ClientID, GcsNodeInfo> {
}
};
class GcsNodeResourceTable : public GcsTable<ClientID, ResourceMap> {
class GcsNodeResourceTable : public GcsTable<NodeID, ResourceMap> {
public:
explicit GcsNodeResourceTable(std::shared_ptr<StoreClient> &store_client)
: GcsTable(store_client) {
@@ -261,7 +261,7 @@ class GcsNodeResourceTable : public GcsTable<ClientID, ResourceMap> {
}
};
class GcsHeartbeatTable : public GcsTable<ClientID, HeartbeatTableData> {
class GcsHeartbeatTable : public GcsTable<NodeID, HeartbeatTableData> {
public:
explicit GcsHeartbeatTable(std::shared_ptr<StoreClient> &store_client)
: GcsTable(store_client) {
@@ -277,7 +277,7 @@ class GcsPlacementGroupScheduleTable : public GcsTable<PlacementGroupID, Schedul
}
};
class GcsHeartbeatBatchTable : public GcsTable<ClientID, HeartbeatBatchTableData> {
class GcsHeartbeatBatchTable : public GcsTable<NodeID, HeartbeatBatchTableData> {
public:
explicit GcsHeartbeatBatchTable(std::shared_ptr<StoreClient> &store_client)
: GcsTable(store_client) {
+1 -1
View File
@@ -22,7 +22,7 @@ void GcsWorkerManager::HandleReportWorkerFailure(
rpc::SendReplyCallback send_reply_callback) {
const rpc::Address worker_address = request.worker_failure().worker_address();
const auto worker_id = WorkerID::FromBinary(worker_address.worker_id());
const auto node_id = ClientID::FromBinary(worker_address.raylet_id());
const auto node_id = NodeID::FromBinary(worker_address.raylet_id());
std::stringstream log_stream;
log_stream << "Reporting worker failure, worker id = " << worker_id
<< ", node id = " << node_id
+2 -1
View File
@@ -13,6 +13,7 @@
// limitations under the License.
#include "ray/gcs/gcs_server/stats_handler_impl.h"
#include "ray/common/ray_config.h"
namespace ray {
@@ -21,7 +22,7 @@ namespace rpc {
void DefaultStatsHandler::HandleAddProfileData(const AddProfileDataRequest &request,
AddProfileDataReply *reply,
SendReplyCallback send_reply_callback) {
ClientID node_id = ClientID::FromBinary(request.profile_data().component_id());
NodeID node_id = NodeID::FromBinary(request.profile_data().component_id());
RAY_LOG(DEBUG) << "Adding profile data, component type = "
<< request.profile_data().component_type() << ", node id = " << node_id;
auto profile_table_data = std::make_shared<ProfileTableData>();
@@ -93,7 +93,7 @@ void DefaultTaskInfoHandler::HandleAddTaskLease(const AddTaskLeaseRequest &reque
AddTaskLeaseReply *reply,
SendReplyCallback send_reply_callback) {
TaskID task_id = TaskID::FromBinary(request.task_lease_data().task_id());
ClientID node_id = ClientID::FromBinary(request.task_lease_data().node_manager_id());
NodeID node_id = NodeID::FromBinary(request.task_lease_data().node_manager_id());
RAY_LOG(DEBUG) << "Adding task lease, job id = " << task_id.JobId()
<< ", task id = " << task_id << ", node id = " << node_id;
auto on_done = [this, task_id, node_id, request, reply,
@@ -144,8 +144,7 @@ void DefaultTaskInfoHandler::HandleAttemptTaskReconstruction(
const AttemptTaskReconstructionRequest &request,
AttemptTaskReconstructionReply *reply, SendReplyCallback send_reply_callback) {
TaskID task_id = TaskID::FromBinary(request.task_reconstruction().task_id());
ClientID node_id =
ClientID::FromBinary(request.task_reconstruction().node_manager_id());
NodeID node_id = NodeID::FromBinary(request.task_reconstruction().node_manager_id());
RAY_LOG(DEBUG) << "Reconstructing task, job id = " << task_id.JobId()
<< ", task id = " << task_id << ", reconstructions num = "
<< request.task_reconstruction().num_reconstructions()
@@ -31,12 +31,11 @@ class MockActorScheduler : public gcs::GcsActorSchedulerInterface {
void Schedule(std::shared_ptr<gcs::GcsActor> actor) { actors.push_back(actor); }
void Reschedule(std::shared_ptr<gcs::GcsActor> actor) {}
void ReleaseUnusedWorkers(
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) {}
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) {}
MOCK_METHOD1(CancelOnNode, std::vector<ActorID>(const ClientID &node_id));
MOCK_METHOD2(CancelOnWorker,
ActorID(const ClientID &node_id, const WorkerID &worker_id));
MOCK_METHOD2(CancelOnLeasing, void(const ClientID &node_id, const ActorID &actor_id));
MOCK_METHOD1(CancelOnNode, std::vector<ActorID>(const NodeID &node_id));
MOCK_METHOD2(CancelOnWorker, ActorID(const NodeID &node_id, const WorkerID &worker_id));
MOCK_METHOD2(CancelOnLeasing, void(const NodeID &node_id, const ActorID &actor_id));
std::vector<std::shared_ptr<gcs::GcsActor>> actors;
};
@@ -121,7 +120,7 @@ class GcsActorManagerTest : public ::testing::Test {
rpc::Address RandomAddress() const {
rpc::Address address;
auto node_id = ClientID::FromRandom();
auto node_id = NodeID::FromRandom();
auto worker_id = WorkerID::FromRandom();
address.set_raylet_id(node_id.Binary());
address.set_worker_id(worker_id.Binary());
@@ -238,7 +237,7 @@ TEST_F(GcsActorManagerTest, TestWorkerFailure) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
auto worker_id = WorkerID::FromBinary(address.worker_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
@@ -281,7 +280,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
WaitActorCreated(actor->GetActorID());
@@ -289,7 +288,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) {
// Killing another node does not affect this actor.
EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_));
gcs_actor_manager_->OnNodeDead(ClientID::FromRandom());
gcs_actor_manager_->OnNodeDead(NodeID::FromRandom());
ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE);
// Remove node and then check that the actor is dead.
@@ -325,7 +324,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
WaitActorCreated(actor->GetActorID());
@@ -341,7 +340,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
ASSERT_EQ(mock_actor_scheduler_->actors.size(), 1);
mock_actor_scheduler_->actors.clear();
ASSERT_EQ(finished_actors.size(), 1);
auto node_id2 = ClientID::FromRandom();
auto node_id2 = NodeID::FromRandom();
address.set_raylet_id(node_id2.Binary());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
@@ -352,7 +351,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
// Killing another worker does not affect this actor.
EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_));
gcs_actor_manager_->OnNodeDead(ClientID::FromRandom());
gcs_actor_manager_->OnNodeDead(NodeID::FromRandom());
ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE);
// Remove worker and then check that the actor is dead.
@@ -388,7 +387,7 @@ TEST_F(GcsActorManagerTest, TestActorRestartWhenOwnerDead) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
WaitActorCreated(actor->GetActorID());
@@ -532,7 +531,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionWorkerFailure) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
auto worker_id = WorkerID::FromBinary(address.worker_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
@@ -578,7 +577,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNodeFailure) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
WaitActorCreated(actor->GetActorID());
@@ -624,7 +623,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNotHappendWhenReconstructed) {
// Check that the actor is in state `ALIVE`.
auto address = RandomAddress();
auto node_id = ClientID::FromBinary(address.raylet_id());
auto node_id = NodeID::FromBinary(address.raylet_id());
auto worker_id = WorkerID::FromBinary(address.worker_id());
actor->UpdateAddress(address);
gcs_actor_manager_->OnActorCreationSuccess(actor);
@@ -699,7 +698,7 @@ TEST_F(GcsActorManagerTest, TestRaceConditionCancelLease) {
// Check that the actor is in state `ALIVE`.
rpc::Address address;
auto node_id = ClientID::FromRandom();
auto node_id = NodeID::FromRandom();
auto worker_id = WorkerID::FromRandom();
address.set_raylet_id(node_id.Binary());
address.set_worker_id(worker_id.Binary());
@@ -742,7 +741,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeActorDependenciesResolved) {
auto job_id = JobID::FromInt(1);
auto registered_actor = RegisterActor(job_id);
const auto &owner_address = registered_actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
gcs_actor_manager_->OnWorkerDead(node_id, worker_id);
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
@@ -758,7 +757,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeDetachedActorDependenciesRes
auto job_id = JobID::FromInt(1);
auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true);
const auto &owner_address = registered_actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
gcs_actor_manager_->OnWorkerDead(node_id, worker_id);
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
@@ -774,7 +773,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeActorDependenciesResolved) {
auto job_id = JobID::FromInt(1);
auto registered_actor = RegisterActor(job_id);
const auto &owner_address = registered_actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
gcs_actor_manager_->OnNodeDead(node_id);
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
@@ -789,7 +788,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeDetachedActorDependenciesResol
auto job_id = JobID::FromInt(1);
auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true);
const auto &owner_address = registered_actor->GetOwnerAddress();
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
gcs_actor_manager_->OnNodeDead(node_id);
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
@@ -84,7 +84,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleFailedWithZeroNode) {
TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -103,7 +103,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
WorkerID worker_id = WorkerID::FromRandom();
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
node->node_manager_port(), worker_id,
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -119,7 +119,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -138,7 +138,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
// Mock a IOError reply, then the lease request will retry again.
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
node_id, ClientID::Nil(), Status::IOError("")));
node_id, NodeID::Nil(), Status::IOError("")));
ASSERT_EQ(1, gcs_actor_scheduler_->num_retry_leasing_count_);
ASSERT_EQ(2, raylet_client_->num_workers_requested);
ASSERT_EQ(1, raylet_client_->callbacks.size());
@@ -148,7 +148,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
WorkerID worker_id = WorkerID::FromRandom();
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
node->node_manager_port(), worker_id,
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -164,7 +164,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -183,7 +183,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
WorkerID worker_id = WorkerID::FromRandom();
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
node->node_manager_port(), worker_id,
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_creating_count_);
@@ -205,7 +205,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -232,7 +232,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
// Grant a worker, which will influence nothing.
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(1, raylet_client_->num_workers_requested);
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_);
@@ -243,7 +243,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -265,7 +265,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
// Grant a worker, which will influence nothing.
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(1, raylet_client_->num_workers_requested);
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_);
@@ -276,7 +276,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -294,7 +294,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
// Grant a worker, then the actor creation request should be send to the worker.
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -318,7 +318,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -337,7 +337,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
auto worker_id = WorkerID::FromRandom();
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
node->node_manager_port(), worker_id,
node_id, ClientID::Nil()));
node_id, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -357,7 +357,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
TEST_F(GcsActorSchedulerTest, TestSpillback) {
auto node1 = Mocker::GenNodeInfo();
auto node_id_1 = ClientID::FromBinary(node1->node_id());
auto node_id_1 = NodeID::FromBinary(node1->node_id());
gcs_node_manager_->AddNode(node1);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -374,12 +374,12 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
// Add another node.
auto node2 = Mocker::GenNodeInfo();
auto node_id_2 = ClientID::FromBinary(node2->node_id());
auto node_id_2 = NodeID::FromBinary(node2->node_id());
gcs_node_manager_->AddNode(node2);
ASSERT_EQ(2, gcs_node_manager_->GetAllAliveNodes().size());
// Grant with an invalid spillback node, and schedule again.
auto invalid_node_id = ClientID::FromBinary(Mocker::GenNodeInfo()->node_id());
auto invalid_node_id = NodeID::FromBinary(Mocker::GenNodeInfo()->node_id());
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
node2->node_manager_address(), node2->node_manager_port(), WorkerID::Nil(),
node_id_1, invalid_node_id));
@@ -400,7 +400,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
WorkerID worker_id = WorkerID::FromRandom();
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node2->node_manager_address(),
node2->node_manager_port(), worker_id,
node_id_2, ClientID::Nil()));
node_id_2, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -417,7 +417,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
TEST_F(GcsActorSchedulerTest, TestReschedule) {
auto node1 = Mocker::GenNodeInfo();
auto node_id_1 = ClientID::FromBinary(node1->node_id());
auto node_id_1 = NodeID::FromBinary(node1->node_id());
gcs_node_manager_->AddNode(node1);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
@@ -452,7 +452,7 @@ TEST_F(GcsActorSchedulerTest, TestReschedule) {
// Grant a worker, then the actor creation request should be send to the worker.
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node1->node_manager_address(),
node1->node_manager_port(), worker_id,
node_id_1, ClientID::Nil()));
node_id_1, NodeID::Nil()));
ASSERT_EQ(0, raylet_client_->callbacks.size());
ASSERT_EQ(1, worker_client_->callbacks.size());
@@ -470,12 +470,12 @@ TEST_F(GcsActorSchedulerTest, TestReleaseUnusedWorkers) {
// Add a node to the cluster.
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
gcs_node_manager_->AddNode(node);
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
// Send a `ReleaseUnusedWorkers` request to the node.
std::unordered_map<ClientID, std::vector<WorkerID>> node_to_workers;
std::unordered_map<NodeID, std::vector<WorkerID>> node_to_workers;
node_to_workers[node_id].push_back({WorkerID::FromRandom()});
gcs_actor_scheduler_->ReleaseUnusedWorkers(node_to_workers);
ASSERT_EQ(1, raylet_client_->num_release_unused_workers);
@@ -37,7 +37,7 @@ TEST_F(GcsNodeManagerTest, TestManagement) {
gcs_table_storage_);
// Test Add/Get/Remove functionality.
auto node = Mocker::GenNodeInfo();
auto node_id = ClientID::FromBinary(node->node_id());
auto node_id = NodeID::FromBinary(node->node_id());
node_manager.AddNode(node);
ASSERT_EQ(node, node_manager.GetNode(node_id));
@@ -67,7 +67,7 @@ TEST_F(GcsNodeManagerTest, TestListener) {
auto &alive_nodes = node_manager.GetAllAliveNodes();
ASSERT_EQ(added_nodes.size(), alive_nodes.size());
for (const auto &node : added_nodes) {
ASSERT_EQ(1, alive_nodes.count(ClientID::FromBinary(node->node_id())));
ASSERT_EQ(1, alive_nodes.count(NodeID::FromBinary(node->node_id())));
}
// Test AddNodeRemovedListener.
@@ -77,7 +77,7 @@ TEST_F(GcsNodeManagerTest, TestListener) {
removed_nodes.emplace_back(std::move(node));
});
for (int i = 0; i < node_count; ++i) {
node_manager.RemoveNode(ClientID::FromBinary(added_nodes[i]->node_id()));
node_manager.RemoveNode(NodeID::FromBinary(added_nodes[i]->node_id()));
}
ASSERT_EQ(node_count, removed_nodes.size());
ASSERT_TRUE(node_manager.GetAllAliveNodes().empty());
@@ -91,7 +91,7 @@ TEST_F(GcsNodeManagerTest, TestGetClusterRealtimeResources) {
gcs::GcsNodeManager node_manager(io_service, io_service, gcs_pub_sub_,
gcs_table_storage_);
auto node_id = ClientID::FromRandom();
auto node_id = NodeID::FromRandom();
rpc::HeartbeatTableData heartbeat;
const std::string cpu_resource = "CPU";
(*heartbeat.mutable_resources_available())[cpu_resource] = 10;
@@ -28,24 +28,24 @@ class MockedGcsObjectManager : public gcs::GcsObjectManager {
: gcs::GcsObjectManager(gcs_table_storage, gcs_pub_sub, gcs_node_manager) {}
public:
void AddObjectsLocation(const ClientID &node_id,
void AddObjectsLocation(const NodeID &node_id,
const absl::flat_hash_set<ObjectID> &object_ids) {
gcs::GcsObjectManager::AddObjectsLocation(node_id, object_ids);
}
void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) {
void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) {
gcs::GcsObjectManager::AddObjectLocationInCache(object_id, node_id);
}
absl::flat_hash_set<ClientID> GetObjectLocations(const ObjectID &object_id) {
absl::flat_hash_set<NodeID> GetObjectLocations(const ObjectID &object_id) {
return gcs::GcsObjectManager::GetObjectLocations(object_id);
}
void OnNodeRemoved(const ClientID &node_id) {
void OnNodeRemoved(const NodeID &node_id) {
gcs::GcsObjectManager::OnNodeRemoved(node_id);
}
void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) {
void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) {
gcs::GcsObjectManager::RemoveObjectLocationInCache(object_id, node_id);
}
};
@@ -67,12 +67,12 @@ class GcsObjectManagerTest : public ::testing::Test {
object_ids_.emplace(object_id);
}
for (size_t i = 0; i < node_count_; ++i) {
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
node_ids_.emplace(node_id);
}
}
void CheckLocations(const absl::flat_hash_set<ClientID> &locations) {
void CheckLocations(const absl::flat_hash_set<NodeID> &locations) {
ASSERT_EQ(locations.size(), node_ids_.size());
for (const auto &location : locations) {
auto it = node_ids_.find(location);
@@ -92,7 +92,7 @@ class GcsObjectManagerTest : public ::testing::Test {
size_t object_count_{5};
size_t node_count_{10};
absl::flat_hash_set<ObjectID> object_ids_;
absl::flat_hash_set<ClientID> node_ids_;
absl::flat_hash_set<NodeID> node_ids_;
};
TEST_F(GcsObjectManagerTest, AddObjectsLocationAndGetLocationTest) {
@@ -40,7 +40,7 @@ class MockPlacementGroupScheduler : public gcs::GcsPlacementGroupSchedulerInterf
MOCK_METHOD1(MarkScheduleCancelled, void(const PlacementGroupID &placement_group_id));
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
const ClientID &node_id) override {
const NodeID &node_id) override {
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> bundles;
bundles[group_on_dead_node_] = bundles_on_dead_node_;
return bundles;
@@ -306,8 +306,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
ASSERT_EQ(finished_placement_group_count, 0);
ASSERT_EQ(mock_placement_group_scheduler_->placement_groups_.size(), 1);
auto placement_group = mock_placement_group_scheduler_->placement_groups_.back();
placement_group->GetMutableBundle(0)->set_node_id(ClientID::FromRandom().Binary());
placement_group->GetMutableBundle(1)->set_node_id(ClientID::FromRandom().Binary());
placement_group->GetMutableBundle(0)->set_node_id(NodeID::FromRandom().Binary());
placement_group->GetMutableBundle(1)->set_node_id(NodeID::FromRandom().Binary());
mock_placement_group_scheduler_->placement_groups_.pop_back();
// If a node dies, we will set the bundles above it to be unplaced and reschedule the
@@ -316,7 +316,7 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
mock_placement_group_scheduler_->group_on_dead_node_ =
placement_group->GetPlacementGroupID();
mock_placement_group_scheduler_->bundles_on_dead_node_.push_back(0);
gcs_placement_group_manager_->OnNodeDead(ClientID::FromRandom());
gcs_placement_group_manager_->OnNodeDead(NodeID::FromRandom());
// Trigger scheduling `RESCHEDULING` placement group.
auto finished_group = std::make_shared<gcs::GcsPlacementGroup>(
@@ -328,8 +328,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
placement_group->GetPlacementGroupID());
const auto &bundles =
mock_placement_group_scheduler_->placement_groups_[0]->GetBundles();
EXPECT_TRUE(ClientID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil());
EXPECT_FALSE(ClientID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil());
EXPECT_TRUE(NodeID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil());
EXPECT_FALSE(NodeID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil());
// If `RESCHEDULING` placement group fails to create, we will schedule it again first.
placement_group = mock_placement_group_scheduler_->placement_groups_.back();
@@ -63,7 +63,7 @@ class GcsPlacementGroupSchedulerTest : public ::testing::Test {
gcs_node_manager_->AddNode(node);
rpc::HeartbeatTableData heartbeat;
(*heartbeat.mutable_resources_available())["CPU"] = cpu_num;
gcs_node_manager_->UpdateNodeRealtimeResources(ClientID::FromBinary(node->node_id()),
gcs_node_manager_->UpdateNodeRealtimeResources(NodeID::FromBinary(node->node_id()),
heartbeat);
}
@@ -489,10 +489,10 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestRescheduleWhenNodeDead) {
WaitPendingDone(success_placement_groups_, 1);
auto bundles_on_node0 =
scheduler_->GetBundlesOnNode(ClientID::FromBinary(node0->node_id()));
scheduler_->GetBundlesOnNode(NodeID::FromBinary(node0->node_id()));
ASSERT_EQ(1, bundles_on_node0.size());
auto bundles_on_node1 =
scheduler_->GetBundlesOnNode(ClientID::FromBinary(node1->node_id()));
scheduler_->GetBundlesOnNode(NodeID::FromBinary(node1->node_id()));
ASSERT_EQ(1, bundles_on_node1.size());
// One node is dead, reschedule the placement group.
auto bundle_on_dead_node = placement_group->GetMutableBundle(0);
@@ -543,8 +543,8 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestStrictSpreadStrategyResourceCheck) {
TEST_F(GcsPlacementGroupSchedulerTest, TestBundleLocationIndex) {
gcs::BundleLocationIndex bundle_location_index;
/// Generate data.
const auto node1 = ClientID::FromRandom();
const auto node2 = ClientID::FromRandom();
const auto node1 = NodeID::FromRandom();
const auto node2 = NodeID::FromRandom();
rpc::CreatePlacementGroupRequest request_pg1 =
Mocker::GenCreatePlacementGroupRequest("pg1");
const auto pg1_id = PlacementGroupID::FromBinary(
@@ -641,7 +641,7 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestNodeDeadDuringCommitResources) {
scheduler_->ScheduleUnplacedBundles(placement_group, failure_handler, success_handler);
ASSERT_TRUE(raylet_clients_[0]->GrantPrepareBundleResources());
gcs_node_manager_->RemoveNode(ClientID::FromBinary(node1->node_id()));
gcs_node_manager_->RemoveNode(NodeID::FromBinary(node1->node_id()));
ASSERT_TRUE(raylet_clients_[1]->GrantPrepareBundleResources());
WaitPendingDone(failure_placement_groups_, 1);
}
@@ -671,8 +671,8 @@ TEST_F(GcsServerTest, TestNodeInfo) {
TEST_F(GcsServerTest, TestObjectInfo) {
// Create object table data
ObjectID object_id = ObjectID::FromRandom();
ClientID node1_id = ClientID::FromRandom();
ClientID node2_id = ClientID::FromRandom();
NodeID node1_id = NodeID::FromRandom();
NodeID node2_id = NodeID::FromRandom();
// Add object location
rpc::AddObjectLocationRequest add_object_location_request;
@@ -720,7 +720,7 @@ TEST_F(GcsServerTest, TestTaskInfo) {
ASSERT_TRUE(!result.has_task());
// Add task lease
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
auto task_lease_data = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
rpc::AddTaskLeaseRequest add_task_lease_request;
add_task_lease_request.mutable_task_lease_data()->CopyFrom(*task_lease_data);
@@ -739,7 +739,7 @@ TEST_F(GcsServerTest, TestTaskInfo) {
TEST_F(GcsServerTest, TestStats) {
rpc::ProfileTableData profile_table_data;
profile_table_data.set_component_id(ClientID::FromRandom().Binary());
profile_table_data.set_component_id(NodeID::FromRandom().Binary());
rpc::AddProfileDataRequest add_profile_data_request;
add_profile_data_request.mutable_profile_data()->CopyFrom(profile_table_data);
ASSERT_TRUE(AddProfileData(add_profile_data_request));
@@ -89,12 +89,12 @@ struct GcsServerMocker {
}
bool GrantWorkerLease() {
return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, ClientID::Nil());
return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, NodeID::Nil());
}
// Trigger reply to RequestWorkerLease.
bool GrantWorkerLease(const std::string &address, int port, const WorkerID &worker_id,
const ClientID &raylet_id, const ClientID &retry_at_raylet_id,
const NodeID &raylet_id, const NodeID &retry_at_raylet_id,
Status status = Status::OK()) {
rpc::RequestWorkerLeaseReply reply;
if (!retry_at_raylet_id.IsNil()) {
@@ -150,7 +150,7 @@ struct GcsServerMocker {
int num_workers_disconnected = 0;
int num_leases_canceled = 0;
int num_release_unused_workers = 0;
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
std::list<rpc::ClientCallback<rpc::RequestWorkerLeaseReply>> callbacks = {};
std::list<rpc::ClientCallback<rpc::CancelWorkerLeaseReply>> cancel_callbacks = {};
std::list<rpc::ClientCallback<rpc::ReleaseUnusedWorkersReply>> release_callbacks = {};
@@ -217,7 +217,7 @@ struct GcsServerMocker {
int num_lease_requested = 0;
int num_return_requested = 0;
int num_commit_requested = 0;
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
std::list<rpc::ClientCallback<rpc::PrepareBundleResourcesReply>> lease_callbacks = {};
std::list<rpc::ClientCallback<rpc::CancelResourceReserveReply>> return_callbacks = {};
};
@@ -289,8 +289,8 @@ struct GcsServerMocker {
Status UnregisterSelf() override { return Status::NotImplemented(""); }
const ClientID &GetSelfId() const override {
static ClientID node_id;
const NodeID &GetSelfId() const override {
static NodeID node_id;
return node_id;
}
@@ -304,7 +304,7 @@ struct GcsServerMocker {
return Status::NotImplemented("");
}
Status AsyncUnregister(const ClientID &node_id,
Status AsyncUnregister(const NodeID &node_id,
const gcs::StatusCallback &callback) override {
if (callback) {
callback(Status::OK());
@@ -321,34 +321,34 @@ struct GcsServerMocker {
}
Status AsyncSubscribeToNodeChange(
const gcs::SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe,
const gcs::SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
const gcs::StatusCallback &done) override {
return Status::NotImplemented("");
}
boost::optional<rpc::GcsNodeInfo> Get(const ClientID &node_id) const override {
boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id) const override {
return boost::none;
}
const std::unordered_map<ClientID, rpc::GcsNodeInfo> &GetAll() const override {
static std::unordered_map<ClientID, rpc::GcsNodeInfo> node_info_list;
const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const override {
static std::unordered_map<NodeID, rpc::GcsNodeInfo> node_info_list;
return node_info_list;
}
bool IsRemoved(const ClientID &node_id) const override { return false; }
bool IsRemoved(const NodeID &node_id) const override { return false; }
Status AsyncGetResources(
const ClientID &node_id,
const NodeID &node_id,
const gcs::OptionalItemCallback<ResourceMap> &callback) override {
return Status::NotImplemented("");
}
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
const gcs::StatusCallback &callback) override {
return Status::NotImplemented("");
}
Status AsyncDeleteResources(const ClientID &node_id,
Status AsyncDeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names,
const gcs::StatusCallback &callback) override {
return Status::NotImplemented("");
@@ -366,7 +366,7 @@ struct GcsServerMocker {
}
Status AsyncSubscribeHeartbeat(
const gcs::SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
const gcs::SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
const gcs::StatusCallback &done) override {
return Status::NotImplemented("");
}
+2 -2
View File
@@ -85,7 +85,7 @@ inline std::shared_ptr<ray::rpc::ActorTableData> CreateActorTableData(
/// Helper function to produce worker failure data.
inline std::shared_ptr<ray::rpc::WorkerTableData> CreateWorkerFailureData(
const ClientID &raylet_id, const WorkerID &worker_id, const std::string &address,
const NodeID &raylet_id, const WorkerID &worker_id, const std::string &address,
int32_t port, int64_t timestamp = std::time(nullptr),
bool intentional_disconnect = false) {
auto worker_failure_info_ptr = std::make_shared<ray::rpc::WorkerTableData>();
@@ -104,7 +104,7 @@ inline std::shared_ptr<ray::rpc::WorkerTableData> CreateWorkerFailureData(
/// \param is_add Whether the object is appeared on the node.
/// \return The object location change created by this method.
inline std::shared_ptr<ray::rpc::ObjectLocationChange> CreateObjectLocationChange(
const ClientID &node_id, bool is_add) {
const NodeID &node_id, bool is_add) {
ray::rpc::ObjectTableData object_table_data;
object_table_data.set_manager(node_id.Binary());
auto object_location_change = std::make_shared<ray::rpc::ObjectLocationChange>();
+34 -37
View File
@@ -139,8 +139,7 @@ Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return log_based_actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe,
done);
return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisLogBasedActorInfoAccessor::AsyncSubscribe(
@@ -316,7 +315,7 @@ Status RedisActorInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisActorInfoAccessor::AsyncSubscribe(
@@ -361,7 +360,7 @@ Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr<JobTableData> &
Status RedisJobInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<JobID, JobTableData> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return job_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl)
@@ -506,7 +505,7 @@ Status RedisObjectInfoAccessor::AsyncGetLocations(
}
Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
const ClientID &node_id,
const NodeID &node_id,
const StatusCallback &callback) {
std::function<void(RedisGcsClient * client, const ObjectID &id,
const ObjectTableData &data)>
@@ -524,7 +523,7 @@ Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
}
Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id,
const ClientID &node_id,
const NodeID &node_id,
const StatusCallback &callback) {
std::function<void(RedisGcsClient * client, const ObjectID &id,
const ObjectTableData &data)>
@@ -569,7 +568,7 @@ Status RedisNodeInfoAccessor::UnregisterSelf() {
return client_table.Disconnect();
}
const ClientID &RedisNodeInfoAccessor::GetSelfId() const {
const NodeID &RedisNodeInfoAccessor::GetSelfId() const {
ClientTable &client_table = client_impl_->client_table();
return client_table.GetLocalClientId();
}
@@ -583,18 +582,18 @@ Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info,
const StatusCallback &callback) {
ClientTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &id,
on_done = [callback](RedisGcsClient *client, const NodeID &id,
const GcsNodeInfo &data) { callback(Status::OK()); };
}
ClientTable &client_table = client_impl_->client_table();
return client_table.MarkConnected(node_info, on_done);
}
Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
const StatusCallback &callback) {
ClientTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &id,
on_done = [callback](RedisGcsClient *client, const NodeID &id,
const GcsNodeInfo &data) { callback(Status::OK()); };
}
ClientTable &client_table = client_impl_->client_table();
@@ -602,8 +601,7 @@ Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
}
Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const StatusCallback &done) {
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
ClientTable &client_table = client_impl_->client_table();
return client_table.SubscribeToNodeChange(subscribe, done);
@@ -612,7 +610,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
Status RedisNodeInfoAccessor::AsyncGetAll(
const MultiItemCallback<GcsNodeInfo> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const ClientID &id,
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
const std::vector<GcsNodeInfo> &data) {
std::vector<GcsNodeInfo> result;
std::set<std::string> node_ids;
@@ -627,7 +625,7 @@ Status RedisNodeInfoAccessor::AsyncGetAll(
return client_table.Lookup(on_done);
}
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const ClientID &node_id) const {
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const NodeID &node_id) const {
GcsNodeInfo node_info;
ClientTable &client_table = client_impl_->client_table();
bool found = client_table.GetClient(node_id, &node_info);
@@ -638,12 +636,12 @@ boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const ClientID &node_id)
return optional_node;
}
const std::unordered_map<ClientID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
const std::unordered_map<NodeID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
ClientTable &client_table = client_impl_->client_table();
return client_table.GetAllClients();
}
bool RedisNodeInfoAccessor::IsRemoved(const ClientID &node_id) const {
bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
ClientTable &client_table = client_impl_->client_table();
return client_table.IsRemoved(node_id);
}
@@ -651,11 +649,11 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
const std::shared_ptr<HeartbeatTableData> &data_ptr, const StatusCallback &callback) {
HeartbeatTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const HeartbeatTableData &data) { callback(Status::OK()); };
}
ClientID node_id = ClientID::FromBinary(data_ptr->client_id());
NodeID node_id = NodeID::FromBinary(data_ptr->client_id());
HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table();
return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done);
}
@@ -663,15 +661,14 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
void RedisNodeInfoAccessor::AsyncReReportHeartbeat() {}
Status RedisNodeInfoAccessor::AsyncSubscribeHeartbeat(
const SubscribeCallback<ClientID, HeartbeatTableData> &subscribe,
const SubscribeCallback<NodeID, HeartbeatTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
auto on_subscribe = [subscribe](const ClientID &node_id,
const HeartbeatTableData &data) {
auto on_subscribe = [subscribe](const NodeID &node_id, const HeartbeatTableData &data) {
subscribe(node_id, data);
};
return heartbeat_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done);
return heartbeat_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
}
Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat(
@@ -679,30 +676,30 @@ Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat(
const StatusCallback &callback) {
HeartbeatBatchTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const HeartbeatBatchTableData &data) { callback(Status::OK()); };
}
HeartbeatBatchTable &hb_batch_table = client_impl_->heartbeat_batch_table();
return hb_batch_table.Add(JobID::Nil(), ClientID::Nil(), data_ptr, on_done);
return hb_batch_table.Add(JobID::Nil(), NodeID::Nil(), data_ptr, on_done);
}
Status RedisNodeInfoAccessor::AsyncSubscribeBatchHeartbeat(
const ItemCallback<HeartbeatBatchTableData> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
auto on_subscribe = [subscribe](const ClientID &node_id,
auto on_subscribe = [subscribe](const NodeID &node_id,
const HeartbeatBatchTableData &data) {
subscribe(data);
};
return heartbeat_batch_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe,
return heartbeat_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe,
done);
}
Status RedisNodeInfoAccessor::AsyncGetResources(
const ClientID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const ClientID &id,
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
const ResourceMap &data) {
boost::optional<ResourceMap> result;
if (!data.empty()) {
@@ -715,12 +712,12 @@ Status RedisNodeInfoAccessor::AsyncGetResources(
return resource_table.Lookup(JobID::Nil(), node_id, on_done);
}
Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id,
Status RedisNodeInfoAccessor::AsyncUpdateResources(const NodeID &node_id,
const ResourceMap &resources,
const StatusCallback &callback) {
Hash<ClientID, ResourceTableData>::HashCallback on_done = nullptr;
Hash<NodeID, ResourceTableData>::HashCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const ResourceMap &resources) { callback(Status::OK()); };
}
@@ -729,11 +726,11 @@ Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id,
}
Status RedisNodeInfoAccessor::AsyncDeleteResources(
const ClientID &node_id, const std::vector<std::string> &resource_names,
const NodeID &node_id, const std::vector<std::string> &resource_names,
const StatusCallback &callback) {
Hash<ClientID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
Hash<NodeID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const std::vector<std::string> &resource_names) {
callback(Status::OK());
};
@@ -746,7 +743,7 @@ Status RedisNodeInfoAccessor::AsyncDeleteResources(
Status RedisNodeInfoAccessor::AsyncSubscribeToResources(
const ItemCallback<rpc::NodeResourceChange> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
auto on_subscribe = [subscribe](const ClientID &id,
auto on_subscribe = [subscribe](const NodeID &id,
const ResourceChangeNotification &result) {
rpc::NodeResourceChange node_resource_change;
node_resource_change.set_node_id(id.Binary());
@@ -762,7 +759,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToResources(
}
subscribe(node_resource_change);
};
return resource_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done);
return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
}
RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {}
@@ -795,7 +792,7 @@ Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures(
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return worker_failure_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure(
+30 -31
View File
@@ -103,13 +103,13 @@ class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor {
protected:
RedisGcsClient *client_impl_{nullptr};
// Use a random ClientID for actor subscription. Because:
// If we use ClientID::Nil, GCS will still send all actors' updates to this GCS Client.
// Use a random NodeID for actor subscription. Because:
// If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local ClientID, so we use
// random ClientID instead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
ClientID subscribe_id_{ClientID::FromRandom()};
NodeID subscribe_id_{NodeID::FromRandom()};
private:
typedef SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>
@@ -246,13 +246,13 @@ class RedisTaskInfoAccessor : public TaskInfoAccessor {
private:
RedisGcsClient *client_impl_{nullptr};
// Use a random ClientID for task subscription. Because:
// If we use ClientID::Nil, GCS will still send all tasks' updates to this GCS Client.
// Use a random NodeID for task subscription. Because:
// If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local ClientID, so we use
// random ClientID instead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
ClientID subscribe_id_{ClientID::FromRandom()};
NodeID subscribe_id_{NodeID::FromRandom()};
typedef SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>
TaskSubscriptionExecutor;
@@ -280,10 +280,10 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor {
return Status::NotImplemented("AsyncGetAll not implemented");
}
Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncSubscribeToLocations(
@@ -298,13 +298,13 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor {
private:
RedisGcsClient *client_impl_{nullptr};
// Use a random ClientID for object subscription. Because:
// If we use ClientID::Nil, GCS will still send all objects' updates to this GCS Client.
// Use a random NodeID for object subscription. Because:
// If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local ClientID, so we use
// random ClientID instead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
ClientID subscribe_id_{ClientID::FromRandom()};
NodeID subscribe_id_{NodeID::FromRandom()};
typedef SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>
ObjectSubscriptionExecutor;
@@ -324,35 +324,34 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
Status UnregisterSelf() override;
const ClientID &GetSelfId() const override;
const NodeID &GetSelfId() const override;
const GcsNodeInfo &GetSelfInfo() const override;
Status AsyncRegister(const GcsNodeInfo &node_info,
const StatusCallback &callback) override;
Status AsyncUnregister(const ClientID &node_id,
const StatusCallback &callback) override;
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
Status AsyncSubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const StatusCallback &done) override;
boost::optional<GcsNodeInfo> Get(const ClientID &node_id) const override;
boost::optional<GcsNodeInfo> Get(const NodeID &node_id) const override;
const std::unordered_map<ClientID, GcsNodeInfo> &GetAll() const override;
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
bool IsRemoved(const ClientID &node_id) const override;
bool IsRemoved(const NodeID &node_id) const override;
Status AsyncGetResources(const ClientID &node_id,
Status AsyncGetResources(const NodeID &node_id,
const OptionalItemCallback<ResourceMap> &callback) override;
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
const StatusCallback &callback) override;
Status AsyncDeleteResources(const ClientID &node_id,
Status AsyncDeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names,
const StatusCallback &callback) override;
@@ -365,7 +364,7 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
void AsyncReReportHeartbeat() override;
Status AsyncSubscribeHeartbeat(
const SubscribeCallback<ClientID, HeartbeatTableData> &subscribe,
const SubscribeCallback<NodeID, HeartbeatTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncReportBatchHeartbeat(
@@ -392,15 +391,15 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
private:
RedisGcsClient *client_impl_{nullptr};
typedef SubscriptionExecutor<ClientID, ResourceChangeNotification, DynamicResourceTable>
typedef SubscriptionExecutor<NodeID, ResourceChangeNotification, DynamicResourceTable>
DynamicResourceSubscriptionExecutor;
DynamicResourceSubscriptionExecutor resource_sub_executor_;
typedef SubscriptionExecutor<ClientID, HeartbeatTableData, HeartbeatTable>
typedef SubscriptionExecutor<NodeID, HeartbeatTableData, HeartbeatTable>
HeartbeatSubscriptionExecutor;
HeartbeatSubscriptionExecutor heartbeat_sub_executor_;
typedef SubscriptionExecutor<ClientID, HeartbeatBatchTableData, HeartbeatBatchTable>
typedef SubscriptionExecutor<NodeID, HeartbeatBatchTableData, HeartbeatBatchTable>
HeartbeatBatchSubscriptionExecutor;
HeartbeatBatchSubscriptionExecutor heartbeat_batch_sub_executor_;
};
+1 -1
View File
@@ -381,7 +381,7 @@ Status RedisContext::RunArgvAsync(const std::vector<std::string> &args,
return status;
}
Status RedisContext::SubscribeAsync(const ClientID &client_id,
Status RedisContext::SubscribeAsync(const NodeID &client_id,
const TablePubsub pubsub_channel,
const RedisCallback &redisCallback,
int64_t *out_callback_index) {
+1 -1
View File
@@ -236,7 +236,7 @@ class RedisContext {
/// \param redisCallback The callback function that the notification calls.
/// \param out_callback_index The output pointer to callback index.
/// \return Status.
Status SubscribeAsync(const ClientID &client_id, const TablePubsub pubsub_channel,
Status SubscribeAsync(const NodeID &client_id, const TablePubsub pubsub_channel,
const RedisCallback &redisCallback, int64_t *out_callback_index);
/// Subscribes the client to the given pattern.
+2 -2
View File
@@ -48,8 +48,8 @@ Status RedisGcsClient::Connect(boost::asio::io_service &io_service) {
log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this));
actor_table_.reset(new ActorTable({primary_context}, this));
// TODO(micafan) Modify ClientTable' Constructor(remove ClientID) in future.
// We will use NodeID instead of ClientID.
// TODO(micafan) Modify ClientTable' Constructor(remove NodeID) in future.
// We will use NodeID instead of NodeID.
// For worker/driver, it might not have this field(NodeID).
// For raylet, NodeID should be initialized in raylet layer(not here).
client_table_.reset(new ClientTable({primary_context}, this));
+7 -8
View File
@@ -20,7 +20,7 @@ namespace gcs {
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
const ClientID &client_id, const SubscribeCallback<ID, Data> &subscribe,
const NodeID &client_id, const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done) {
// TODO(micafan) Optimize the lock when necessary.
// Consider avoiding locking in single-threaded processes.
@@ -110,9 +110,9 @@ Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
const ClientID &client_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
const NodeID &client_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done) {
RAY_CHECK(client_id != ClientID::Nil());
RAY_CHECK(client_id != NodeID::Nil());
// NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts,
// thus we need to call `RequestNotifications` in the Subscribe callback to ensure
@@ -160,7 +160,7 @@ Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncUnsubscribe(
const ClientID &client_id, const ID &id, const StatusCallback &done) {
const NodeID &client_id, const ID &id, const StatusCallback &done) {
SubscribeCallback<ID, Data> subscribe = nullptr;
{
std::unique_lock<std::mutex> lock(mutex_);
@@ -205,11 +205,10 @@ template class SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>;
template class SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>;
template class SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>,
TaskLeaseTable>;
template class SubscriptionExecutor<ClientID, ResourceChangeNotification,
template class SubscriptionExecutor<NodeID, ResourceChangeNotification,
DynamicResourceTable>;
template class SubscriptionExecutor<ClientID, HeartbeatTableData, HeartbeatTable>;
template class SubscriptionExecutor<ClientID, HeartbeatBatchTableData,
HeartbeatBatchTable>;
template class SubscriptionExecutor<NodeID, HeartbeatTableData, HeartbeatTable>;
template class SubscriptionExecutor<NodeID, HeartbeatBatchTableData, HeartbeatBatchTable>;
template class SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>;
} // namespace gcs
+3 -3
View File
@@ -46,7 +46,7 @@ class SubscriptionExecutor {
/// is registered or updated.
/// \param done Callback that will be called when subscription is complete.
/// \return Status
Status AsyncSubscribeAll(const ClientID &client_id,
Status AsyncSubscribeAll(const NodeID &client_id,
const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done);
@@ -61,7 +61,7 @@ class SubscriptionExecutor {
/// is registered or updated.
/// \param done Callback that will be called when subscription is complete.
/// \return Status
Status AsyncSubscribe(const ClientID &client_id, const ID &id,
Status AsyncSubscribe(const NodeID &client_id, const ID &id,
const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done);
@@ -74,7 +74,7 @@ class SubscriptionExecutor {
/// \param id The id of the element to be unsubscribed to.
/// \param done Callback that will be called when cancel subscription is complete.
/// \return Status
Status AsyncUnsubscribe(const ClientID &client_id, const ID &id,
Status AsyncUnsubscribe(const NodeID &client_id, const ID &id,
const StatusCallback &done);
private:
+25 -26
View File
@@ -137,7 +137,7 @@ Status Log<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &
}
template <typename ID, typename Data>
Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id,
@@ -150,7 +150,7 @@ Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
}
template <typename ID, typename Data>
Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done) {
RAY_CHECK(subscribe_callback_index_ == -1)
@@ -192,7 +192,7 @@ Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
template <typename ID, typename Data>
Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
const ClientID &client_id,
const NodeID &client_id,
const StatusCallback &done) {
RAY_CHECK(subscribe_callback_index_ >= 0)
<< "Client requested notifications on a key before Subscribe completed";
@@ -214,7 +214,7 @@ Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
template <typename ID, typename Data>
Status Log<ID, Data>::CancelNotifications(const JobID &job_id, const ID &id,
const ClientID &client_id,
const NodeID &client_id,
const StatusCallback &done) {
RAY_CHECK(subscribe_callback_index_ >= 0)
<< "Client canceled notifications on a key before Subscribe completed";
@@ -315,7 +315,7 @@ Status Table<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback
}
template <typename ID, typename Data>
Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe,
const FailureCallback &failure,
const SubscriptionCallback &done) {
@@ -336,7 +336,7 @@ Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id
}
template <typename ID, typename Data>
Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
return Subscribe(job_id, client_id, subscribe, /*failure*/ nullptr, done);
@@ -379,7 +379,7 @@ Status Set<ID, Data>::Remove(const JobID &job_id, const ID &id,
}
template <typename ID, typename Data>
Status Set<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Set<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done) {
auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id,
@@ -481,7 +481,7 @@ Status Hash<ID, Data>::Lookup(const JobID &job_id, const ID &id,
}
template <typename ID, typename Data>
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) {
RAY_CHECK(subscribe_callback_index_ == -1)
@@ -551,7 +551,7 @@ void ClientTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback)
void ClientTable::HandleNotification(RedisGcsClient *client,
const GcsNodeInfo &node_info) {
ClientID node_id = ClientID::FromBinary(node_info.node_id());
NodeID node_id = NodeID::FromBinary(node_info.node_id());
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
// It's possible to get duplicate notifications from the client table, so
// check whether this notification is new.
@@ -598,14 +598,14 @@ void ClientTable::HandleNotification(RedisGcsClient *client,
}
}
const ClientID &ClientTable::GetLocalClientId() const {
const NodeID &ClientTable::GetLocalClientId() const {
RAY_CHECK(!local_node_id_.IsNil());
return local_node_id_;
}
const GcsNodeInfo &ClientTable::GetLocalClient() const { return local_node_info_; }
bool ClientTable::IsRemoved(const ClientID &node_id) const {
bool ClientTable::IsRemoved(const NodeID &node_id) const {
return removed_nodes_.count(node_id) == 1;
}
@@ -617,7 +617,7 @@ Status ClientTable::Connect(const GcsNodeInfo &local_node_info) {
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info);
Status status = SyncAppend(JobID::Nil(), client_log_key_, node_info_ptr);
if (status.ok()) {
local_node_id_ = ClientID::FromBinary(local_node_info.node_id());
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
local_node_info_ = local_node_info;
}
return status;
@@ -642,7 +642,7 @@ ray::Status ClientTable::MarkConnected(const GcsNodeInfo &node_info,
return Append(JobID::Nil(), client_log_key_, node_info_ptr, done);
}
ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id,
ray::Status ClientTable::MarkDisconnected(const NodeID &dead_node_id,
const WriteCallback &done) {
auto node_info = std::make_shared<GcsNodeInfo>();
node_info->set_node_id(dead_node_id.Binary());
@@ -651,8 +651,7 @@ ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id,
}
ray::Status ClientTable::SubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const StatusCallback &done) {
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
// Callback for a notification from the client table.
auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key,
const std::vector<GcsNodeInfo> &notifications) {
@@ -699,7 +698,7 @@ ray::Status ClientTable::SubscribeToNodeChange(
return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done);
}
bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) const {
bool ClientTable::GetClient(const NodeID &node_id, GcsNodeInfo *node_info) const {
RAY_CHECK(!node_id.IsNil());
auto entry = node_cache_.find(node_id);
auto found = (entry != node_cache_.end());
@@ -709,7 +708,7 @@ bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) con
return found;
}
const std::unordered_map<ClientID, GcsNodeInfo> &ClientTable::GetAllClients() const {
const std::unordered_map<NodeID, GcsNodeInfo> &ClientTable::GetAllClients() const {
return node_cache_;
}
@@ -720,13 +719,13 @@ Status ClientTable::Lookup(const Callback &lookup) {
std::string ClientTable::DebugString() const {
std::stringstream result;
result << Log<ClientID, GcsNodeInfo>::DebugString();
result << Log<NodeID, GcsNodeInfo>::DebugString();
result << ", cache size: " << node_cache_.size()
<< ", num removed: " << removed_nodes_.size();
return result.str();
}
Status TaskLeaseTable::Subscribe(const JobID &job_id, const ClientID &client_id,
Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id,
@@ -865,21 +864,21 @@ template class Table<TaskID, TaskTableData>;
template class Log<ActorID, ActorTableData>;
template class Log<TaskID, TaskReconstructionData>;
template class Table<TaskID, TaskLeaseData>;
template class Table<ClientID, HeartbeatTableData>;
template class Table<ClientID, HeartbeatBatchTableData>;
template class Log<ClientID, GcsNodeInfo>;
template class Table<NodeID, HeartbeatTableData>;
template class Table<NodeID, HeartbeatBatchTableData>;
template class Log<NodeID, GcsNodeInfo>;
template class Log<JobID, JobTableData>;
template class Log<UniqueID, ProfileTableData>;
template class Log<ClientID, HeartbeatTableData>;
template class Log<ClientID, HeartbeatBatchTableData>;
template class Log<NodeID, HeartbeatTableData>;
template class Log<NodeID, HeartbeatBatchTableData>;
template class Log<WorkerID, WorkerTableData>;
template class Table<ActorCheckpointID, ActorCheckpointData>;
template class Table<ActorID, ActorCheckpointIdData>;
template class Table<WorkerID, WorkerTableData>;
template class Table<ActorID, ActorTableData>;
template class Log<ClientID, ResourceTableData>;
template class Hash<ClientID, ResourceTableData>;
template class Log<NodeID, ResourceTableData>;
template class Hash<NodeID, ResourceTableData>;
} // namespace gcs
+30 -30
View File
@@ -71,10 +71,10 @@ template <typename ID>
class PubsubInterface {
public:
virtual Status RequestNotifications(const JobID &job_id, const ID &id,
const ClientID &client_id,
const NodeID &client_id,
const StatusCallback &done) = 0;
virtual Status CancelNotifications(const JobID &job_id, const ID &id,
const ClientID &client_id,
const NodeID &client_id,
const StatusCallback &done) = 0;
virtual ~PubsubInterface(){};
};
@@ -195,7 +195,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe, const SubscriptionCallback &done);
/// Request notifications about a key in this table.
@@ -214,8 +214,8 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
/// notifications can be requested, a call to `Subscribe` to this
/// table with the same `client_id` must complete successfully.
/// \return Status
Status RequestNotifications(const JobID &job_id, const ID &id,
const ClientID &client_id, const StatusCallback &done);
Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &client_id,
const StatusCallback &done);
/// Cancel notifications about a key in this table.
///
@@ -224,7 +224,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
/// \param client_id The client who originally requested notifications.
/// \param done Callback that is called when cancel notifications is complete.
/// \return Status
Status CancelNotifications(const JobID &job_id, const ID &id, const ClientID &client_id,
Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &client_id,
const StatusCallback &done);
/// Subscribe to any modifications to the key. The caller may choose
@@ -245,7 +245,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done);
@@ -380,7 +380,7 @@ class Table : private Log<ID, Data>,
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe, const FailureCallback &failure,
const SubscriptionCallback &done);
@@ -399,7 +399,7 @@ class Table : private Log<ID, Data>,
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe, const SubscriptionCallback &done);
void Delete(const JobID &job_id, const ID &id) { Log<ID, Data>::Delete(job_id, id); }
@@ -499,7 +499,7 @@ class Set : private Log<ID, Data>,
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done);
@@ -600,7 +600,7 @@ class HashInterface {
/// \param done SubscriptionCallback that is called when subscription is complete and
/// we are ready to receive messages.
/// \return Status
virtual Status Subscribe(const JobID &job_id, const ClientID &client_id,
virtual Status Subscribe(const JobID &job_id, const NodeID &client_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) = 0;
@@ -628,7 +628,7 @@ class Hash : private Log<ID, Data>,
Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
const HashCallback &done) override;
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) override;
@@ -656,7 +656,7 @@ class Hash : private Log<ID, Data>,
using Log<ID, Data>::num_lookups_;
};
class DynamicResourceTable : public Hash<ClientID, ResourceTableData> {
class DynamicResourceTable : public Hash<NodeID, ResourceTableData> {
public:
DynamicResourceTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
@@ -680,7 +680,7 @@ class ObjectTable : public Set<ObjectID, ObjectTableData> {
virtual ~ObjectTable(){};
};
class HeartbeatTable : public Table<ClientID, HeartbeatTableData> {
class HeartbeatTable : public Table<NodeID, HeartbeatTableData> {
public:
HeartbeatTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
@@ -691,7 +691,7 @@ class HeartbeatTable : public Table<ClientID, HeartbeatTableData> {
virtual ~HeartbeatTable() {}
};
class HeartbeatBatchTable : public Table<ClientID, HeartbeatBatchTableData> {
class HeartbeatBatchTable : public Table<NodeID, HeartbeatBatchTableData> {
public:
HeartbeatBatchTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
@@ -807,7 +807,7 @@ class TaskLeaseTable : public Table<TaskID, TaskLeaseData> {
/// Implement this method for the subscription tools class SubscriptionExecutor.
/// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor.
Status Subscribe(const JobID &job_id, const ClientID &client_id,
Status Subscribe(const JobID &job_id, const NodeID &client_id,
const Callback &subscribe, const SubscriptionCallback &done);
};
@@ -882,8 +882,8 @@ class ProfileTable : public Log<UniqueID, ProfileTableData> {
/// alive. When a client disconnects, or if another client detects its failure,
/// it should append an entry to the log indicating that it is dead. A client
/// that is marked as dead should never again be marked as alive; if it needs
/// to reconnect, it must connect with a different ClientID.
class ClientTable : public Log<ClientID, GcsNodeInfo> {
/// to reconnect, it must connect with a different NodeID.
class ClientTable : public Log<NodeID, GcsNodeInfo> {
public:
ClientTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
@@ -920,10 +920,10 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
/// \param done Callback that is called once the node has been marked to
/// disconnected.
/// \return Status
ray::Status MarkDisconnected(const ClientID &dead_node_id, const WriteCallback &done);
ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done);
ray::Status SubscribeToNodeChange(
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const StatusCallback &done);
/// Get a client's information from the cache. The cache only contains
@@ -934,12 +934,12 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
/// we have the client in the cache.
/// a nil client ID.
/// \return Whether teh client is in the cache.
bool GetClient(const ClientID &client, GcsNodeInfo *node_info) const;
bool GetClient(const NodeID &client, GcsNodeInfo *node_info) const;
/// Get the local client's ID.
///
/// \return The local client's ID.
const ClientID &GetLocalClientId() const;
const NodeID &GetLocalClientId() const;
/// Get the local client's information.
///
@@ -950,12 +950,12 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
///
/// \param node_id The ID of the client to check.
/// \return Whether the client with ID client_id is removed.
bool IsRemoved(const ClientID &node_id) const;
bool IsRemoved(const NodeID &node_id) const;
/// Get the information of all clients.
///
/// \return The client ID to client information map.
const std::unordered_map<ClientID, GcsNodeInfo> &GetAllClients() const;
const std::unordered_map<NodeID, GcsNodeInfo> &GetAllClients() const;
/// Lookup the client data in the client table.
///
@@ -972,11 +972,11 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
/// The key at which the log of client information is stored. This key must
/// be kept the same across all instances of the ClientTable, so that all
/// clients append and read from the same key.
ClientID client_log_key_;
NodeID client_log_key_;
private:
using NodeChangeCallback =
std::function<void(const ClientID &id, const GcsNodeInfo &node_info)>;
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
/// Register a callback to call when a new node is added or a node is removed.
///
@@ -989,20 +989,20 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
/// Whether this client has called Disconnect().
bool disconnected_{false};
/// This node's ID. It will be initialized when we call method `Connect(...)`.
ClientID local_node_id_;
NodeID local_node_id_;
/// Information about this node.
GcsNodeInfo local_node_info_;
/// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and
/// RequestNotification.
/// The reason for not using `local_node_id_` is because it is only initialized
/// for registered nodes.
ClientID subscribe_id_{ClientID::FromRandom()};
NodeID subscribe_id_{NodeID::FromRandom()};
/// The callback to call when a new node is added or a node is removed.
NodeChangeCallback node_change_callback_{nullptr};
/// A cache for information about all nodes.
std::unordered_map<ClientID, GcsNodeInfo> node_cache_;
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
/// The set of removed nodes.
std::unordered_set<ClientID> removed_nodes_;
std::unordered_set<NodeID> removed_nodes_;
};
} // namespace gcs
+4 -4
View File
@@ -51,7 +51,7 @@ struct Mocker {
bool detached = false,
const std::string name = "") {
rpc::Address owner_address;
owner_address.set_raylet_id(ClientID::FromRandom().Binary());
owner_address.set_raylet_id(NodeID::FromRandom().Binary());
owner_address.set_ip_address("1234");
owner_address.set_port(5678);
owner_address.set_worker_id(WorkerID::FromRandom().Binary());
@@ -67,7 +67,7 @@ struct Mocker {
bool detached = false,
const std::string name = "") {
rpc::Address owner_address;
owner_address.set_raylet_id(ClientID::FromRandom().Binary());
owner_address.set_raylet_id(NodeID::FromRandom().Binary());
owner_address.set_ip_address("1234");
owner_address.set_port(5678);
owner_address.set_worker_id(WorkerID::FromRandom().Binary());
@@ -109,7 +109,7 @@ struct Mocker {
static std::shared_ptr<rpc::GcsNodeInfo> GenNodeInfo(
uint16_t port = 0, const std::string address = "127.0.0.1") {
auto node = std::make_shared<rpc::GcsNodeInfo>();
node->set_node_id(ClientID::FromRandom().Binary());
node->set_node_id(NodeID::FromRandom().Binary());
node->set_node_manager_port(port);
node->set_node_manager_address(address);
return node;
@@ -158,7 +158,7 @@ struct Mocker {
}
static std::shared_ptr<rpc::ProfileTableData> GenProfileTableData(
const ClientID &node_id) {
const NodeID &node_id) {
auto profile_table_data = std::make_shared<rpc::ProfileTableData>();
profile_table_data->set_component_id(node_id.Binary());
return profile_table_data;
+22 -22
View File
@@ -70,7 +70,7 @@ class TestGcs : public ::testing::Test {
};
TestGcs *test;
ClientID local_client_id = ClientID::FromRandom();
NodeID local_client_id = NodeID::FromRandom();
class TestGcsWithAsio : public TestGcs {
public:
@@ -666,7 +666,7 @@ class SetTestHelper {
// subscribed, we will append to the key several times and check that we get
// notified for each.
RAY_CHECK_OK(client->object_table().Subscribe(
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
// Run the event loop. The loop will only stop if the registered subscription
// callback is called (or an assertion failure).
@@ -1005,7 +1005,7 @@ class LogSubscribeTestHelper {
// subscribed, we will append to the key several times and check that we get
// notified for each.
RAY_CHECK_OK(client->job_table().Subscribe(
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
// Run the event loop. The loop will only stop if the registered subscription
// callback is called (or an assertion failure).
@@ -1187,16 +1187,16 @@ TEST_F(TestGcsWithAsio, TestSetSubscribeCancel) {
class ClientTableTestHelper {
public:
static void ClientTableNotification(std::shared_ptr<gcs::RedisGcsClient> client,
const ClientID &client_id, const GcsNodeInfo &data,
const NodeID &client_id, const GcsNodeInfo &data,
bool is_alive) {
ClientID added_id = local_client_id;
NodeID added_id = local_client_id;
ASSERT_EQ(client_id, added_id);
ASSERT_EQ(ClientID::FromBinary(data.node_id()), added_id);
ASSERT_EQ(NodeID::FromBinary(data.node_id()), added_id);
ASSERT_EQ(data.state() == GcsNodeInfo::ALIVE, is_alive);
GcsNodeInfo cached_client;
ASSERT_TRUE(client->client_table().GetClient(added_id, &cached_client));
ASSERT_EQ(ClientID::FromBinary(cached_client.node_id()), added_id);
ASSERT_EQ(NodeID::FromBinary(cached_client.node_id()), added_id);
ASSERT_EQ(cached_client.state() == GcsNodeInfo::ALIVE, is_alive);
}
@@ -1205,7 +1205,7 @@ class ClientTableTestHelper {
// Subscribe to a node gets added and removed. The latter
// event will stop the event loop.
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
[client](const ClientID &id, const GcsNodeInfo &data) {
[client](const NodeID &id, const GcsNodeInfo &data) {
// TODO(micafan)
RAY_LOG(INFO) << "Test alive=" << data.state() << " id=" << id;
if (data.state() == GcsNodeInfo::ALIVE) {
@@ -1231,7 +1231,7 @@ class ClientTableTestHelper {
// Register callbacks for when a client gets added and removed. The latter
// event will stop the event loop.
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
[client](const ClientID &id, const GcsNodeInfo &data) {
[client](const NodeID &id, const GcsNodeInfo &data) {
if (data.state() == GcsNodeInfo::ALIVE) {
ClientTableNotification(client, id, data, /*is_insertion=*/true);
// Disconnect from the client table. We should receive a notification
@@ -1260,7 +1260,7 @@ class ClientTableTestHelper {
// Register callbacks for when a client gets added and removed. The latter
// event will stop the event loop.
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
[client](const ClientID &id, const GcsNodeInfo &data) {
[client](const NodeID &id, const GcsNodeInfo &data) {
if (data.state() == GcsNodeInfo::ALIVE) {
ClientTableNotification(client, id, data, true);
} else {
@@ -1291,14 +1291,14 @@ class ClientTableTestHelper {
// Connect to the client table to start receiving notifications.
RAY_CHECK_OK(client->client_table().Connect(local_node_info));
// Mark a different client as dead.
ClientID dead_client_id = ClientID::FromRandom();
NodeID dead_client_id = NodeID::FromRandom();
RAY_CHECK_OK(client->client_table().MarkDisconnected(dead_client_id, nullptr));
// Make sure we only get a notification for the removal of the client we
// marked as dead.
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
[dead_client_id](const UniqueID &id, const GcsNodeInfo &data) {
if (data.state() == GcsNodeInfo::DEAD) {
ASSERT_EQ(ClientID::FromBinary(data.node_id()), dead_client_id);
ASSERT_EQ(NodeID::FromBinary(data.node_id()), dead_client_id);
test->Stop();
}
},
@@ -1332,7 +1332,7 @@ class HashTableTestHelper {
static void TestHashTable(const JobID &job_id,
std::shared_ptr<gcs::RedisGcsClient> client) {
uint64_t expected_count = 14;
ClientID client_id = ClientID::FromRandom();
NodeID client_id = NodeID::FromRandom();
// Prepare the first resource map: data_map1.
DynamicResourceTable::DataMap data_map1;
auto cpu_data = std::make_shared<ResourceTableData>();
@@ -1370,7 +1370,7 @@ class HashTableTestHelper {
};
auto notification_callback =
[data_map1, data_map2, compare_test, expected_count](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const std::vector<ResourceChangeNotification> &result) {
RAY_CHECK(result.size() == 1);
const ResourceChangeNotification &notification = result.back();
@@ -1399,13 +1399,13 @@ class HashTableTestHelper {
};
// Step 0: Subscribe the change of the hash table.
RAY_CHECK_OK(client->resource_table().Subscribe(
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
RAY_CHECK_OK(client->resource_table().RequestNotifications(job_id, client_id,
local_client_id, nullptr));
// Step 1: Add elements to the hash table.
auto update_callback1 = [data_map1, compare_test](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
compare_test(data_map1, callback_data);
test->IncrementNumCallbacks();
@@ -1413,7 +1413,7 @@ class HashTableTestHelper {
RAY_CHECK_OK(
client->resource_table().Update(job_id, client_id, data_map1, update_callback1));
auto lookup_callback1 = [data_map1, compare_test](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
compare_test(data_map1, callback_data);
test->IncrementNumCallbacks();
@@ -1423,14 +1423,14 @@ class HashTableTestHelper {
// Step 2: Decrease one element, increase one and add a new one.
RAY_CHECK_OK(client->resource_table().Update(job_id, client_id, data_map2, nullptr));
auto lookup_callback2 = [data_map2, compare_test](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
compare_test(data_map2, callback_data);
test->IncrementNumCallbacks();
};
RAY_CHECK_OK(client->resource_table().Lookup(job_id, client_id, lookup_callback2));
std::vector<std::string> delete_keys({"GPU", "CUSTOM", "None-Existent"});
auto remove_callback = [delete_keys](RedisGcsClient *client, const ClientID &id,
auto remove_callback = [delete_keys](RedisGcsClient *client, const NodeID &id,
const std::vector<std::string> &callback_data) {
for (size_t i = 0; i < callback_data.size(); ++i) {
// All deleting keys exist in this argument even if the key doesn't exist.
@@ -1444,7 +1444,7 @@ class HashTableTestHelper {
data_map3.erase("GPU");
data_map3.erase("CUSTOM");
auto lookup_callback3 = [data_map3, compare_test](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
compare_test(data_map3, callback_data);
test->IncrementNumCallbacks();
@@ -1455,7 +1455,7 @@ class HashTableTestHelper {
RAY_CHECK_OK(
client->resource_table().Update(job_id, client_id, data_map1, update_callback1));
auto lookup_callback4 = [data_map1, compare_test](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
compare_test(data_map1, callback_data);
test->IncrementNumCallbacks();
@@ -1466,7 +1466,7 @@ class HashTableTestHelper {
RAY_CHECK_OK(client->resource_table().RemoveEntries(
job_id, client_id, {"GPU", "CPU", "CUSTOM", "None-Existent"}, nullptr));
auto lookup_callback5 = [expected_count](
RedisGcsClient *client, const ClientID &id,
RedisGcsClient *client, const NodeID &id,
const DynamicResourceTable::DataMap &callback_data) {
ASSERT_EQ(callback_data.size(), 0);
test->IncrementNumCallbacks();
@@ -23,12 +23,12 @@ namespace ray {
namespace gcs {
class NodeDynamicResourceTest : public AccessorTestBase<ClientID, ResourceTableData> {
class NodeDynamicResourceTest : public AccessorTestBase<NodeID, ResourceTableData> {
protected:
typedef NodeInfoAccessor::ResourceMap ResourceMap;
virtual void GenTestData() {
for (size_t node_index = 0; node_index < node_number_; ++node_index) {
ClientID id = ClientID::FromRandom();
NodeID id = NodeID::FromRandom();
ResourceMap resource_map;
for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) {
std::shared_ptr<ResourceTableData> rs_data =
@@ -44,7 +44,7 @@ class NodeDynamicResourceTest : public AccessorTestBase<ClientID, ResourceTableD
}
}
std::unordered_map<ClientID, ResourceMap> id_to_resource_map_;
std::unordered_map<NodeID, ResourceMap> id_to_resource_map_;
size_t node_number_{100};
size_t resource_type_number_{5};
@@ -59,7 +59,7 @@ TEST_F(NodeDynamicResourceTest, UpdateAndGet) {
NodeInfoAccessor &node_accessor = gcs_client_->Nodes();
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
const ClientID &id = node_rs.first;
const NodeID &id = node_rs.first;
// Update
Status status = node_accessor.AsyncUpdateResources(
node_rs.first, node_rs.second, [this, &node_accessor, id](Status status) {
@@ -95,7 +95,7 @@ TEST_F(NodeDynamicResourceTest, Delete) {
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
const ClientID &id = node_rs.first;
const NodeID &id = node_rs.first;
// Delete
Status status = node_accessor.AsyncDeleteResources(
id, resource_to_delete_, [this, &node_accessor, id](Status status) {
@@ -128,7 +128,7 @@ TEST_F(NodeDynamicResourceTest, Subscribe) {
WaitPendingDone(wait_pending_timeout_);
auto subscribe = [this](const rpc::NodeResourceChange &notification) {
auto id = ClientID::FromBinary(notification.node_id());
auto id = NodeID::FromBinary(notification.node_id());
RAY_LOG(INFO) << "receive client id=" << id;
auto it = id_to_resource_map_.find(id);
ASSERT_TRUE(it != id_to_resource_map_.end());
@@ -32,7 +32,7 @@ class RedisObjectInfoAccessorTest : public AccessorTestBase<ObjectID, ObjectTabl
ObjectVector object_vec;
for (size_t j = 0; j < copy_count_; ++j) {
auto object = std::make_shared<ObjectTableData>();
ClientID node_id = ClientID::FromRandom();
NodeID node_id = NodeID::FromRandom();
object->set_manager(node_id.Binary());
object_vec.emplace_back(std::move(object));
}
@@ -55,7 +55,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
for (const auto &elem : object_id_to_data_) {
for (const auto &item : elem.second) {
++pending_count_;
ClientID node_id = ClientID::FromBinary(item->manager());
NodeID node_id = NodeID::FromBinary(item->manager());
RAY_CHECK_OK(
object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) {
RAY_CHECK_OK(status);
@@ -113,7 +113,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
++pending_count_;
++sub_pending_count;
const ObjectVector &object_vec = elem.second;
ClientID node_id = ClientID::FromBinary(object_vec[0]->manager());
NodeID node_id = NodeID::FromBinary(object_vec[0]->manager());
RAY_CHECK_OK(
object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) {
RAY_CHECK_OK(status);
@@ -103,12 +103,12 @@ class SubscriptionExecutorTest : public AccessorTestBase<ActorID, ActorTableData
TEST_F(SubscriptionExecutorTest, SubscribeAllTest) {
++do_sub_pending_count_;
Status status =
actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
ASSERT_TRUE(status.ok());
sub_pending_count_ = id_to_data_.size();
AsyncRegisterActorToGcs();
status = actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
status = actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
ASSERT_TRUE(status.IsInvalid());
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
}
@@ -117,13 +117,13 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneWithClientIDTest) {
const auto &item = id_to_data_.begin();
++do_sub_pending_count_;
++sub_pending_count_;
Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
subscribe_, sub_done_);
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
ASSERT_TRUE(status.ok());
AsyncRegisterActorToGcs();
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
subscribe_, sub_done_);
ASSERT_TRUE(status.IsInvalid());
}
@@ -133,12 +133,12 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT
++do_sub_pending_count_;
++sub_pending_count_;
AsyncRegisterActorToGcs();
Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
subscribe_, sub_done_);
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
ASSERT_TRUE(status.ok());
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
subscribe_, sub_done_);
ASSERT_TRUE(status.IsInvalid());
}
@@ -146,11 +146,11 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT
TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) {
++do_sub_pending_count_;
Status status =
actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
ASSERT_TRUE(status.ok());
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
for (const auto &item : id_to_data_) {
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item.first,
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item.first,
subscribe_, sub_done_);
ASSERT_FALSE(status.ok());
}
@@ -160,7 +160,7 @@ TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) {
}
TEST_F(SubscriptionExecutorTest, UnsubscribeTest) {
ClientID client_id = ClientID::FromRandom();
NodeID client_id = NodeID::FromRandom();
Status status;
for (const auto &item : id_to_data_) {
status = actor_sub_executor_->AsyncUnsubscribe(client_id, item.first, unsub_done_);
+7 -7
View File
@@ -32,13 +32,13 @@ using ray::rpc::ObjectTableData;
bool UpdateObjectLocations(bool is_added,
const std::vector<ObjectTableData> &location_updates,
std::shared_ptr<gcs::GcsClient> gcs_client,
std::unordered_set<ClientID> *node_ids) {
std::unordered_set<NodeID> *node_ids) {
// location_updates contains the updates of locations of the object.
// with GcsChangeMode, we can determine whether the update mode is
// addition or deletion.
bool isUpdated = false;
for (const auto &object_table_data : location_updates) {
ClientID node_id = ClientID::FromBinary(object_table_data.manager());
NodeID node_id = NodeID::FromBinary(object_table_data.manager());
if (is_added && 0 == node_ids->count(node_id)) {
node_ids->insert(node_id);
isUpdated = true;
@@ -62,7 +62,7 @@ bool UpdateObjectLocations(bool is_added,
} // namespace
ray::Status ObjectDirectory::ReportObjectAdded(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) {
RAY_LOG(DEBUG) << "Reporting object added to GCS " << object_id;
ray::Status status =
@@ -71,7 +71,7 @@ ray::Status ObjectDirectory::ReportObjectAdded(
}
ray::Status ObjectDirectory::ReportObjectRemoved(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) {
RAY_LOG(DEBUG) << "Reporting object removed to GCS " << object_id;
ray::Status status =
@@ -83,7 +83,7 @@ void ObjectDirectory::LookupRemoteConnectionInfo(
RemoteConnectionInfo &connection_info) const {
auto node_info = gcs_client_->Nodes().Get(connection_info.client_id);
if (node_info) {
ClientID result_node_id = ClientID::FromBinary(node_info->node_id());
NodeID result_node_id = NodeID::FromBinary(node_info->node_id());
RAY_CHECK(result_node_id == connection_info.client_id);
if (node_info->state() == GcsNodeInfo::ALIVE) {
connection_info.ip = node_info->node_manager_address();
@@ -105,7 +105,7 @@ std::vector<RemoteConnectionInfo> ObjectDirectory::LookupAllRemoteConnections()
return remote_connections;
}
void ObjectDirectory::HandleClientRemoved(const ClientID &client_id) {
void ObjectDirectory::HandleClientRemoved(const NodeID &client_id) {
for (auto &listener : listeners_) {
const ObjectID &object_id = listener.first;
if (listener.second.current_object_locations.count(client_id) > 0) {
@@ -224,7 +224,7 @@ ray::Status ObjectDirectory::LookupLocations(const ObjectID &object_id,
RAY_CHECK(status.ok())
<< "Failed to get object location from GCS: " << status.message();
// Build the set of current locations based on the entries in the log.
std::unordered_set<ClientID> node_ids;
std::unordered_set<NodeID> node_ids;
UpdateObjectLocations(/*is_added*/ true, location_updates, gcs_client_,
&node_ids);
// It is safe to call the callback directly since this is already running
+11 -11
View File
@@ -29,13 +29,13 @@ namespace ray {
/// Connection information for remote object managers.
struct RemoteConnectionInfo {
RemoteConnectionInfo(const ClientID &id) : client_id(id) {}
RemoteConnectionInfo(const NodeID &id) : client_id(id) {}
// Returns whether there is enough information to connect to the remote
// object manager.
bool Connected() const { return !ip.empty(); }
ClientID client_id;
NodeID client_id;
std::string ip;
uint16_t port;
};
@@ -60,7 +60,7 @@ class ObjectDirectoryInterface {
/// Callback for object location notifications.
using OnLocationsFound = std::function<void(const ray::ObjectID &object_id,
const std::unordered_set<ray::ClientID> &)>;
const std::unordered_set<ray::NodeID> &)>;
/// Lookup object locations. Callback may be invoked with empty list of client ids.
///
@@ -76,9 +76,9 @@ class ObjectDirectoryInterface {
/// location, and fires the subscribed callbacks for those objects.
///
/// \param client_id The object manager client that was removed.
virtual void HandleClientRemoved(const ClientID &client_id) = 0;
virtual void HandleClientRemoved(const NodeID &client_id) = 0;
/// Subscribe to be notified of locations (ClientID) of the given object.
/// Subscribe to be notified of locations (NodeID) of the given object.
/// The callback will be invoked with the complete list of known locations
/// whenever the set of locations changes. The callback will also be fired if
/// the list of known locations is empty. The callback provided to this
@@ -113,7 +113,7 @@ class ObjectDirectoryInterface {
/// \param object_info Additional information about the object.
/// \return Status of whether this method succeeded.
virtual ray::Status ReportObjectAdded(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) = 0;
/// Report objects removed from this client's store to the object directory.
@@ -123,7 +123,7 @@ class ObjectDirectoryInterface {
/// \param object_info Additional information about the object.
/// \return Status of whether this method succeeded.
virtual ray::Status ReportObjectRemoved(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) = 0;
/// Returns debug string for class.
@@ -154,7 +154,7 @@ class ObjectDirectory : public ObjectDirectoryInterface {
const rpc::Address &owner_address,
const OnLocationsFound &callback) override;
void HandleClientRemoved(const ClientID &client_id) override;
void HandleClientRemoved(const NodeID &client_id) override;
ray::Status SubscribeObjectLocations(const UniqueID &callback_id,
const ObjectID &object_id,
@@ -164,10 +164,10 @@ class ObjectDirectory : public ObjectDirectoryInterface {
const ObjectID &object_id) override;
ray::Status ReportObjectAdded(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) override;
ray::Status ReportObjectRemoved(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) override;
std::string DebugString() const override;
@@ -181,7 +181,7 @@ class ObjectDirectory : public ObjectDirectoryInterface {
/// The callback to invoke when object locations are found.
std::unordered_map<UniqueID, OnLocationsFound> callbacks;
/// The current set of known locations of this object.
std::unordered_set<ClientID> current_object_locations;
std::unordered_set<NodeID> current_object_locations;
/// This flag will get set to true if received any notification of the object.
/// It means current_object_locations is up-to-date with GCS. It
/// should never go back to false once set to true. If this is true, and
+18 -19
View File
@@ -48,7 +48,7 @@ ObjectStoreRunner::~ObjectStoreRunner() {
}
}
ObjectManager::ObjectManager(asio::io_service &main_service, const ClientID &self_node_id,
ObjectManager::ObjectManager(asio::io_service &main_service, const NodeID &self_node_id,
const ObjectManagerConfig &config,
std::shared_ptr<ObjectDirectoryInterface> object_directory)
: self_node_id_(self_node_id),
@@ -184,7 +184,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id,
// no ordering guarantee between notifications.
return object_directory_->SubscribeObjectLocations(
object_directory_pull_callback_id_, object_id, owner_address,
[this](const ObjectID &object_id, const std::unordered_set<ClientID> &client_ids) {
[this](const ObjectID &object_id, const std::unordered_set<NodeID> &client_ids) {
// Exit if the Pull request has already been fulfilled or canceled.
auto it = pull_requests_.find(object_id);
if (it == pull_requests_.end()) {
@@ -195,7 +195,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id,
// we may end up sending a duplicate request to the same client as
// before.
it->second.client_locations =
std::vector<ClientID>(client_ids.begin(), client_ids.end());
std::vector<NodeID>(client_ids.begin(), client_ids.end());
if (it->second.client_locations.empty()) {
// The object locations are now empty, so we should wait for the next
// notification about a new object location. Cancel the timer until
@@ -244,7 +244,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) {
// Generate a random index.
std::uniform_int_distribution<int> distribution(0, node_vector.size() - 1);
int node_index = distribution(gen_);
ClientID node_id = node_vector[node_index];
NodeID node_id = node_vector[node_index];
// If the object manager somehow ended up choosing itself, choose a different
// object manager.
if (node_id == self_node_id_) {
@@ -309,7 +309,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) {
};
void ObjectManager::SendPullRequest(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
std::shared_ptr<rpc::ObjectManagerClient> rpc_client) {
rpc::PullRequest pull_request;
pull_request.set_object_id(object_id.Binary());
@@ -325,7 +325,7 @@ void ObjectManager::SendPullRequest(
}
void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id,
const ClientID &client_id) {
const NodeID &client_id) {
RAY_LOG(WARNING) << "Invalid Push request ObjectID: " << object_id
<< " after waiting for " << config_.push_timeout_ms << " ms.";
auto iter = unfulfilled_push_requests_.find(object_id);
@@ -337,10 +337,9 @@ void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id,
}
}
void ObjectManager::HandleSendFinished(const ObjectID &object_id,
const ClientID &client_id, uint64_t chunk_index,
double start_time, double end_time,
ray::Status status) {
void ObjectManager::HandleSendFinished(const ObjectID &object_id, const NodeID &client_id,
uint64_t chunk_index, double start_time,
double end_time, ray::Status status) {
RAY_LOG(DEBUG) << "HandleSendFinished on " << self_node_id_ << " to " << client_id
<< " of object " << object_id << " chunk " << chunk_index
<< ", status: " << status.ToString();
@@ -363,7 +362,7 @@ void ObjectManager::HandleSendFinished(const ObjectID &object_id,
}
void ObjectManager::HandleReceiveFinished(const ObjectID &object_id,
const ClientID &client_id, uint64_t chunk_index,
const NodeID &client_id, uint64_t chunk_index,
double start_time, double end_time,
ray::Status status) {
if (!status.ok()) {
@@ -385,7 +384,7 @@ void ObjectManager::HandleReceiveFinished(const ObjectID &object_id,
profile_events_.push_back(profile_event);
}
void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) {
void ObjectManager::Push(const ObjectID &object_id, const NodeID &client_id) {
RAY_LOG(DEBUG) << "Push on " << self_node_id_ << " to " << client_id << " of object "
<< object_id;
if (local_objects_.count(object_id) == 0) {
@@ -483,7 +482,7 @@ void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) {
ray::Status ObjectManager::SendObjectChunk(
const UniqueID &push_id, const ObjectID &object_id, const rpc::Address &owner_address,
const ClientID &client_id, uint64_t data_size, uint64_t metadata_size,
const NodeID &client_id, uint64_t data_size, uint64_t metadata_size,
uint64_t chunk_index, std::shared_ptr<rpc::ObjectManagerClient> rpc_client) {
double start_time = absl::GetCurrentTimeNanos() / 1e9;
rpc::PushRequest push_request;
@@ -606,7 +605,7 @@ ray::Status ObjectManager::LookupRemainingWaitObjects(const UniqueID &wait_id) {
RAY_RETURN_NOT_OK(object_directory_->LookupLocations(
object_id, wait_state.owner_addresses[object_id],
[this, wait_id](const ObjectID &lookup_object_id,
const std::unordered_set<ClientID> &client_ids) {
const std::unordered_set<NodeID> &client_ids) {
auto &wait_state = active_wait_requests_.find(wait_id)->second;
// Note that the object is guaranteed to be added to local_objects_ before
// the notification is triggered.
@@ -647,7 +646,7 @@ void ObjectManager::SubscribeRemainingWaitObjects(const UniqueID &wait_id) {
RAY_CHECK_OK(object_directory_->SubscribeObjectLocations(
wait_id, object_id, wait_state.owner_addresses[object_id],
[this, wait_id](const ObjectID &subscribe_object_id,
const std::unordered_set<ClientID> &client_ids) {
const std::unordered_set<NodeID> &client_ids) {
auto object_id_wait_state = active_wait_requests_.find(wait_id);
if (object_id_wait_state == active_wait_requests_.end()) {
// Depending on the timing of calls to the object directory, we
@@ -738,7 +737,7 @@ void ObjectManager::WaitComplete(const UniqueID &wait_id) {
void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ObjectID object_id = ObjectID::FromBinary(request.object_id());
ClientID client_id = ClientID::FromBinary(request.client_id());
NodeID client_id = NodeID::FromBinary(request.client_id());
// Serialize.
uint64_t chunk_index = request.chunk_index();
@@ -756,7 +755,7 @@ void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply *
send_reply_callback(status, nullptr, nullptr);
}
ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id,
ray::Status ObjectManager::ReceiveObjectChunk(const NodeID &client_id,
const ObjectID &object_id,
const rpc::Address &owner_address,
uint64_t data_size, uint64_t metadata_size,
@@ -787,7 +786,7 @@ ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id,
void ObjectManager::HandlePull(const rpc::PullRequest &request, rpc::PullReply *reply,
rpc::SendReplyCallback send_reply_callback) {
ObjectID object_id = ObjectID::FromBinary(request.object_id());
ClientID client_id = ClientID::FromBinary(request.client_id());
NodeID client_id = NodeID::FromBinary(request.client_id());
RAY_LOG(DEBUG) << "Received pull request from client " << client_id << " for object ["
<< object_id << "].";
@@ -855,7 +854,7 @@ void ObjectManager::SpreadFreeObjectsRequest(
}
std::shared_ptr<rpc::ObjectManagerClient> ObjectManager::GetRpcClient(
const ClientID &client_id) {
const NodeID &client_id) {
auto it = remote_object_manager_clients_.find(client_id);
if (it == remote_object_manager_clients_.end()) {
RemoteConnectionInfo connection_info(client_id);
+17 -22
View File
@@ -26,10 +26,6 @@
#include <random>
#include <thread>
#include <boost/asio.hpp>
#include <boost/asio/error.hpp>
#include <boost/bind.hpp>
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
#include "absl/time/clock.h"
@@ -80,7 +76,7 @@ struct LocalObjectInfo {
object_manager::protocol::ObjectInfoT object_info;
/// A map from the ID of a remote object manager to the timestamp of when
/// the object was last pushed to that object manager (if a push took place).
std::unordered_map<ClientID, int64_t> recent_pushes;
std::unordered_map<NodeID, int64_t> recent_pushes;
};
class ObjectStoreRunner {
@@ -146,9 +142,9 @@ class ObjectManager : public ObjectManagerInterface,
/// \param chunk_index Chunk index of this object chunk, start with 0
/// \param rpc_client Rpc client used to send message to remote object manager
ray::Status SendObjectChunk(const UniqueID &push_id, const ObjectID &object_id,
const rpc::Address &owner_address,
const ClientID &client_id, uint64_t data_size,
uint64_t metadata_size, uint64_t chunk_index,
const rpc::Address &owner_address, const NodeID &client_id,
uint64_t data_size, uint64_t metadata_size,
uint64_t chunk_index,
std::shared_ptr<rpc::ObjectManagerClient> rpc_client);
/// Receive object chunk from remote object manager, small object may contain one chunk
@@ -160,7 +156,7 @@ class ObjectManager : public ObjectManagerInterface,
/// \param metadata_size Metadata size
/// \param chunk_index Chunk index
/// \param data Chunk data
ray::Status ReceiveObjectChunk(const ClientID &client_id, const ObjectID &object_id,
ray::Status ReceiveObjectChunk(const NodeID &client_id, const ObjectID &object_id,
const rpc::Address &owner_address, uint64_t data_size,
uint64_t metadata_size, uint64_t chunk_index,
const std::string &data);
@@ -169,13 +165,13 @@ class ObjectManager : public ObjectManagerInterface,
///
/// \param object_id Object id
/// \param client_id Remote server client id
void SendPullRequest(const ObjectID &object_id, const ClientID &client_id,
void SendPullRequest(const ObjectID &object_id, const NodeID &client_id,
std::shared_ptr<rpc::ObjectManagerClient> rpc_client);
/// Get the rpc client according to the client ID
///
/// \param client_id Remote client id, will send rpc request to it
std::shared_ptr<rpc::ObjectManagerClient> GetRpcClient(const ClientID &client_id);
std::shared_ptr<rpc::ObjectManagerClient> GetRpcClient(const NodeID &client_id);
/// Get the port of the object manager rpc server.
int GetServerPort() const { return object_manager_server_.GetPort(); }
@@ -189,7 +185,7 @@ class ObjectManager : public ObjectManagerInterface,
/// \param config ObjectManager configuration.
/// \param object_directory An object implementing the object directory interface.
explicit ObjectManager(boost::asio::io_service &main_service,
const ClientID &self_node_id, const ObjectManagerConfig &config,
const NodeID &self_node_id, const ObjectManagerConfig &config,
std::shared_ptr<ObjectDirectoryInterface> object_directory);
~ObjectManager();
@@ -221,9 +217,9 @@ class ObjectManager : public ObjectManagerInterface,
/// \param object_id The object's object id.
/// \param client_id The remote node's client id.
/// \return Void.
void Push(const ObjectID &object_id, const ClientID &client_id);
void Push(const ObjectID &object_id, const NodeID &client_id);
/// Pull an object from ClientID.
/// Pull an object from NodeID.
///
/// \param object_id The object's object id.
/// \return Status of whether the pull request successfully initiated.
@@ -294,7 +290,7 @@ class ObjectManager : public ObjectManagerInterface,
PullRequest() : retry_timer(nullptr), timer_set(false), client_locations() {}
std::unique_ptr<boost::asio::deadline_timer> retry_timer;
bool timer_set;
std::vector<ClientID> client_locations;
std::vector<NodeID> client_locations;
};
struct WaitState {
@@ -377,7 +373,7 @@ class ObjectManager : public ObjectManagerInterface,
/// chunk.
/// \param status The status of the send (e.g., did it succeed or fail).
/// \return Void.
void HandleSendFinished(const ObjectID &object_id, const ClientID &client_id,
void HandleSendFinished(const ObjectID &object_id, const NodeID &client_id,
uint64_t chunk_index, double start_time_us, double end_time_us,
ray::Status status);
@@ -393,14 +389,14 @@ class ObjectManager : public ObjectManagerInterface,
/// chunk.
/// \param status The status of the receive (e.g., did it succeed or fail).
/// \return Void.
void HandleReceiveFinished(const ObjectID &object_id, const ClientID &client_id,
void HandleReceiveFinished(const ObjectID &object_id, const NodeID &client_id,
uint64_t chunk_index, double start_time_us,
double end_time_us, ray::Status status);
/// Handle Push task timeout.
void HandlePushTaskTimeout(const ObjectID &object_id, const ClientID &client_id);
void HandlePushTaskTimeout(const ObjectID &object_id, const NodeID &client_id);
ClientID self_node_id_;
NodeID self_node_id_;
const ObjectManagerConfig config_;
std::shared_ptr<ObjectDirectoryInterface> object_directory_;
// Object store runner.
@@ -439,8 +435,7 @@ class ObjectManager : public ObjectManagerInterface,
/// Maintains a map of push requests that have not been fulfilled due to an object not
/// being local. Objects are removed from this map after push_timeout_ms have elapsed.
std::unordered_map<
ObjectID,
std::unordered_map<ClientID, std::unique_ptr<boost::asio::deadline_timer>>>
ObjectID, std::unordered_map<NodeID, std::unique_ptr<boost::asio::deadline_timer>>>
unfulfilled_push_requests_;
/// The objects that this object manager is currently trying to fetch from
@@ -468,7 +463,7 @@ class ObjectManager : public ObjectManagerInterface,
rpc::ClientCallManager client_call_manager_;
/// Client id - object manager gRPC client.
std::unordered_map<ClientID, std::shared_ptr<rpc::ObjectManagerClient>>
std::unordered_map<NodeID, std::shared_ptr<rpc::ObjectManagerClient>>
remote_object_manager_clients_;
/// Running sum of the amount of memory used in the object store.
@@ -24,7 +24,7 @@ namespace {
/// Filter out the removed clients from the object locations.
void FilterRemovedClients(std::shared_ptr<gcs::GcsClient> gcs_client,
std::unordered_set<ClientID> *node_ids) {
std::unordered_set<NodeID> *node_ids) {
for (auto it = node_ids->begin(); it != node_ids->end();) {
if (gcs_client->Nodes().IsRemoved(*it)) {
it = node_ids->erase(it);
@@ -64,7 +64,7 @@ std::shared_ptr<rpc::CoreWorkerClient> OwnershipBasedObjectDirectory::GetClient(
}
ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) {
WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id);
rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info);
@@ -91,7 +91,7 @@ ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded(
}
ray::Status OwnershipBasedObjectDirectory::ReportObjectRemoved(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) {
WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id);
rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info);
@@ -126,9 +126,9 @@ void OwnershipBasedObjectDirectory::SubscriptionCallback(
return;
}
std::unordered_set<ClientID> client_ids;
std::unordered_set<NodeID> client_ids;
for (auto const &client_id : reply.client_ids()) {
client_ids.emplace(ClientID::FromBinary(client_id));
client_ids.emplace(NodeID::FromBinary(client_id));
}
FilterRemovedClients(gcs_client_, &client_ids);
if (client_ids != it->second.current_object_locations) {
@@ -208,7 +208,7 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations(
RAY_LOG(WARNING) << "Object " << object_id << " does not have owner. "
<< "LookupLocations returns an empty list of locations.";
io_service_.post(
[callback, object_id]() { callback(object_id, std::unordered_set<ClientID>()); });
[callback, object_id]() { callback(object_id, std::unordered_set<NodeID>()); });
return Status::OK();
}
@@ -223,9 +223,9 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations(
RAY_LOG(ERROR) << "Worker " << worker_id << " failed to get the location for "
<< object_id;
}
std::unordered_set<ClientID> client_ids;
std::unordered_set<NodeID> client_ids;
for (auto const &client_id : reply.client_ids()) {
client_ids.emplace(ClientID::FromBinary(client_id));
client_ids.emplace(NodeID::FromBinary(client_id));
}
FilterRemovedClients(gcs_client_, &client_ids);
callback(object_id, client_ids);
@@ -56,10 +56,10 @@ class OwnershipBasedObjectDirectory : public ObjectDirectory {
const ObjectID &object_id) override;
ray::Status ReportObjectAdded(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) override;
ray::Status ReportObjectRemoved(
const ObjectID &object_id, const ClientID &client_id,
const ObjectID &object_id, const NodeID &client_id,
const object_manager::protocol::ObjectInfoT &object_info) override;
std::string DebugString() const override;
+2 -2
View File
@@ -34,7 +34,7 @@
namespace plasma {
using ray::ObjectID;
using ray::ClientID;
using ray::NodeID;
using ray::WorkerID;
enum class ObjectLocation : int32_t { Local, Remote, Nonexistent };
@@ -81,7 +81,7 @@ struct ObjectTableEntry {
/// Number of clients currently using this object.
int ref_count;
/// Owner's raylet ID.
ClientID owner_raylet_id;
NodeID owner_raylet_id;
/// Owner's IP address.
std::string owner_ip_address;
/// Owner's port.
+2 -2
View File
@@ -206,7 +206,7 @@ Status SendCreateRequest(const std::shared_ptr<StoreConn> &store_conn, ObjectID
}
Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
ClientID* owner_raylet_id, std::string* owner_ip_address,
NodeID* owner_raylet_id, std::string* owner_ip_address,
int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full,
int64_t* data_size, int64_t* metadata_size,
int* device_num) {
@@ -217,7 +217,7 @@ Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
*data_size = message->data_size();
*metadata_size = message->metadata_size();
*object_id = ObjectID::FromBinary(message->object_id()->str());
*owner_raylet_id = ClientID::FromBinary(message->owner_raylet_id()->str());
*owner_raylet_id = NodeID::FromBinary(message->owner_raylet_id()->str());
*owner_ip_address = message->owner_ip_address()->str();
*owner_port = message->owner_port();
*owner_worker_id = WorkerID::FromBinary(message->owner_worker_id()->str());
+1 -1
View File
@@ -85,7 +85,7 @@ Status SendCreateRequest(const std::shared_ptr<StoreConn> &store_conn, ObjectID
int64_t data_size, int64_t metadata_size, int device_num);
Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
ClientID* owner_raylet_id, std::string* owner_ip_address,
NodeID* owner_raylet_id, std::string* owner_ip_address,
int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full,
int64_t* data_size, int64_t* metadata_size,
int* device_num);
+2 -2
View File
@@ -227,7 +227,7 @@ Status PlasmaStore::FreeCudaMemory(int device_num, int64_t size, uint8_t* pointe
// Create a new object buffer in the hash table.
PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id,
const ClientID& owner_raylet_id,
const NodeID& owner_raylet_id,
const std::string& owner_ip_address,
int owner_port, const WorkerID& owner_worker_id,
bool evict_if_full, int64_t data_size,
@@ -884,7 +884,7 @@ Status PlasmaStore::ProcessMessage(const std::shared_ptr<Client> &client,
// Process the different types of requests.
switch (type) {
case fb::MessageType::PlasmaCreateRequest: {
ClientID owner_raylet_id;
NodeID owner_raylet_id;
std::string owner_ip_address;
int owner_port;
WorkerID owner_worker_id;
+1 -1
View File
@@ -94,7 +94,7 @@ class PlasmaStore {
/// - PlasmaError::OutOfMemory, if the store is out of memory and
/// cannot create the object. In this case, the client should not call
/// plasma_release.
PlasmaError CreateObject(const ObjectID& object_id, const ClientID& owner_raylet_id,
PlasmaError CreateObject(const ObjectID& object_id, const NodeID& owner_raylet_id,
const std::string& owner_ip_address, int owner_port,
const WorkerID& owner_worker_id, bool evict_if_full,
int64_t data_size, int64_t metadata_size, int device_num,
@@ -50,7 +50,7 @@ class MockServer {
MockServer(boost::asio::io_service &main_service,
const ObjectManagerConfig &object_manager_config,
std::shared_ptr<gcs::GcsClient> gcs_client)
: node_id_(ClientID::FromRandom()),
: node_id_(NodeID::FromRandom()),
config_(object_manager_config),
gcs_client_(gcs_client),
object_manager_(main_service, node_id_, object_manager_config,
@@ -75,7 +75,7 @@ class MockServer {
friend class StressTestObjectManager;
ClientID node_id_;
NodeID node_id_;
ObjectManagerConfig config_;
std::shared_ptr<gcs::GcsClient> gcs_client_;
ObjectManager object_manager_;
@@ -199,8 +199,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
int num_connected_clients = 0;
ClientID node_id_1;
ClientID node_id_2;
NodeID node_id_1;
NodeID node_id_2;
int64_t start_time;
@@ -208,7 +208,7 @@ class StressTestObjectManager : public TestObjectManagerBase {
node_id_1 = gcs_client_1->Nodes().GetSelfId();
node_id_2 = gcs_client_2->Nodes().GetSelfId();
RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
[this](const ClientID &node_id, const GcsNodeInfo &data) {
[this](const NodeID &node_id, const GcsNodeInfo &data) {
if (node_id == node_id_1 || node_id == node_id_2) {
num_connected_clients += 1;
}
@@ -218,7 +218,7 @@ class StressTestObjectManager : public TestObjectManagerBase {
},
nullptr));
RAY_CHECK_OK(gcs_client_2->Nodes().AsyncSubscribeToNodeChange(
[this](const ClientID &node_id, const GcsNodeInfo &data) {
[this](const NodeID &node_id, const GcsNodeInfo &data) {
if (node_id == node_id_1 || node_id == node_id_2) {
num_connected_clients += 1;
}
@@ -310,8 +310,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
void TransferTestExecute(int num_trials, int64_t data_size,
TransferPattern transfer_pattern) {
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
ray::Status status = ray::Status::OK();
@@ -388,8 +388,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
RAY_LOG(DEBUG) << "\n"
<< "Server node ids:"
<< "\n";
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
RAY_LOG(DEBUG) << "Server 1: " << node_id_1 << "\n"
<< "Server 2: " << node_id_2;
@@ -397,11 +397,11 @@ class StressTestObjectManager : public TestObjectManagerBase {
<< "All connected nodes:"
<< "\n";
auto data = gcs_client_1->Nodes().Get(node_id_1);
RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data->node_id()) << "\n"
RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data->node_id()) << "\n"
<< "NodeIp=" << data->node_manager_address() << "\n"
<< "NodePort=" << data->node_manager_port();
auto data2 = gcs_client_1->Nodes().Get(node_id_2);
RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data2->node_id()) << "\n"
RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data2->node_id()) << "\n"
<< "NodeIp=" << data2->node_manager_address() << "\n"
<< "NodePort=" << data2->node_manager_port();
}
@@ -46,7 +46,7 @@ class MockServer {
MockServer(boost::asio::io_service &main_service,
const ObjectManagerConfig &object_manager_config,
std::shared_ptr<gcs::GcsClient> gcs_client)
: node_id_(ClientID::FromRandom()),
: node_id_(NodeID::FromRandom()),
config_(object_manager_config),
gcs_client_(gcs_client),
object_manager_(main_service, node_id_, object_manager_config,
@@ -71,7 +71,7 @@ class MockServer {
friend class TestObjectManager;
ClientID node_id_;
NodeID node_id_;
ObjectManagerConfig config_;
std::shared_ptr<gcs::GcsClient> gcs_client_;
ObjectManager object_manager_;
@@ -180,8 +180,8 @@ class TestObjectManager : public TestObjectManagerBase {
public:
int current_wait_test = -1;
int num_connected_clients = 0;
ClientID node_id_1;
ClientID node_id_2;
NodeID node_id_1;
NodeID node_id_2;
ObjectID created_object_id1;
ObjectID created_object_id2;
@@ -192,7 +192,7 @@ class TestObjectManager : public TestObjectManagerBase {
node_id_1 = gcs_client_1->Nodes().GetSelfId();
node_id_2 = gcs_client_2->Nodes().GetSelfId();
RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
[this](const ClientID &node_id, const GcsNodeInfo &data) {
[this](const NodeID &node_id, const GcsNodeInfo &data) {
if (node_id == node_id_1 || node_id == node_id_2) {
num_connected_clients += 1;
}
@@ -264,7 +264,7 @@ class TestObjectManager : public TestObjectManagerBase {
sub_id, object_1, rpc::Address(),
[this, sub_id, object_1, object_2](
const ray::ObjectID &object_id,
const std::unordered_set<ray::ClientID> &clients) {
const std::unordered_set<ray::NodeID> &clients) {
if (!clients.empty()) {
TestWaitWhileSubscribed(sub_id, object_1, object_2);
}
@@ -430,16 +430,16 @@ class TestObjectManager : public TestObjectManagerBase {
<< "Server node ids:"
<< "\n";
auto data = gcs_client_1->Nodes().Get(node_id_1);
RAY_LOG(DEBUG) << (ClientID::FromBinary(data->node_id()).IsNil());
RAY_LOG(DEBUG) << "Server 1 NodeID=" << ClientID::FromBinary(data->node_id());
RAY_LOG(DEBUG) << (NodeID::FromBinary(data->node_id()).IsNil());
RAY_LOG(DEBUG) << "Server 1 NodeID=" << NodeID::FromBinary(data->node_id());
RAY_LOG(DEBUG) << "Server 1 NodeIp=" << data->node_manager_address();
RAY_LOG(DEBUG) << "Server 1 NodePort=" << data->node_manager_port();
ASSERT_EQ(node_id_1, ClientID::FromBinary(data->node_id()));
ASSERT_EQ(node_id_1, NodeID::FromBinary(data->node_id()));
auto data2 = gcs_client_1->Nodes().Get(node_id_2);
RAY_LOG(DEBUG) << "Server 2 NodeID=" << ClientID::FromBinary(data2->node_id());
RAY_LOG(DEBUG) << "Server 2 NodeID=" << NodeID::FromBinary(data2->node_id());
RAY_LOG(DEBUG) << "Server 2 NodeIp=" << data2->node_manager_address();
RAY_LOG(DEBUG) << "Server 2 NodePort=" << data2->node_manager_port();
ASSERT_EQ(node_id_2, ClientID::FromBinary(data2->node_id()));
ASSERT_EQ(node_id_2, NodeID::FromBinary(data2->node_id()));
}
};
+2 -2
View File
@@ -49,8 +49,8 @@ ActorRegistration::ActorRegistration(const ActorTableData &actor_table_data,
}
}
const ClientID ActorRegistration::GetNodeManagerId() const {
return ClientID::FromBinary(actor_table_data_.address().raylet_id());
const NodeID ActorRegistration::GetNodeManagerId() const {
return NodeID::FromBinary(actor_table_data_.address().raylet_id());
}
const ObjectID ActorRegistration::GetActorCreationDependency() const {
+1 -1
View File
@@ -77,7 +77,7 @@ class ActorRegistration {
///
/// \return The actor's node manager location. All tasks for the actor should
/// be forwarded to this node.
const ClientID GetNodeManagerId() const;
const NodeID GetNodeManagerId() const;
/// Get the object that represents the actor's initial state. This is the
/// execution dependency returned by this actor's creation task. If
+1 -1
View File
@@ -33,7 +33,7 @@ typedef std::function<std::shared_ptr<boost::asio::deadline_timer>(std::function
class AgentManager : public rpc::AgentManagerServiceHandler {
public:
struct Options {
const ClientID node_id;
const NodeID node_id;
std::vector<std::string> agent_commands;
};
+1 -1
View File
@@ -154,7 +154,7 @@ table RegisterClientReply {
success: bool;
// The reason of registration failure.
failure_reason: string;
// GCS ClientID of the local node manager.
// GCS NodeID of the local node manager.
raylet_id: string;
// Port that this worker should listen on.
port: int;
+24 -26
View File
@@ -117,9 +117,8 @@ std::string WorkerOwnerString(std::shared_ptr<WorkerInterface> &worker) {
return buffer.str();
}
NodeManager::NodeManager(boost::asio::io_service &io_service,
const ClientID &self_node_id, const NodeManagerConfig &config,
ObjectManager &object_manager,
NodeManager::NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id,
const NodeManagerConfig &config, ObjectManager &object_manager,
std::shared_ptr<gcs::GcsClient> gcs_client,
std::shared_ptr<ObjectDirectoryInterface> object_directory)
: self_node_id_(self_node_id),
@@ -194,7 +193,7 @@ NodeManager::NodeManager(boost::asio::io_service &io_service,
return args_ready;
};
auto get_node_info_func = [this](const ClientID &node_id) {
auto get_node_info_func = [this](const NodeID &node_id) {
return gcs_client_->Nodes().Get(node_id);
};
cluster_task_manager_ = std::shared_ptr<ClusterTaskManager>(
@@ -231,7 +230,7 @@ ray::Status NodeManager::RegisterGcs() {
RAY_RETURN_NOT_OK(
gcs_client_->Actors().AsyncSubscribeAll(actor_notification_callback, nullptr));
auto on_node_change = [this](const ClientID &node_id, const GcsNodeInfo &data) {
auto on_node_change = [this](const NodeID &node_id, const GcsNodeInfo &data) {
if (data.state() == GcsNodeInfo::ALIVE) {
NodeAdded(data);
} else {
@@ -247,7 +246,7 @@ ray::Status NodeManager::RegisterGcs() {
// Subscribe to resource changes.
const auto &resources_changed =
[this](const rpc::NodeResourceChange &resource_notification) {
auto id = ClientID::FromBinary(resource_notification.node_id());
auto id = NodeID::FromBinary(resource_notification.node_id());
if (resource_notification.updated_resources_size() != 0) {
ResourceSet resource_set(
MapFromProtobuf(resource_notification.updated_resources()));
@@ -720,7 +719,7 @@ void NodeManager::GetObjectManagerProfileInfo() {
}
void NodeManager::NodeAdded(const GcsNodeInfo &node_info) {
const ClientID node_id = ClientID::FromBinary(node_info.node_id());
const NodeID node_id = NodeID::FromBinary(node_info.node_id());
RAY_LOG(DEBUG) << "[NodeAdded] Received callback from client id " << node_id;
if (1 == cluster_resource_map_.count(node_id)) {
@@ -761,7 +760,7 @@ void NodeManager::NodeAdded(const GcsNodeInfo &node_info) {
void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) {
// TODO(swang): If we receive a notification for our own death, clean up and
// exit immediately.
const ClientID node_id = ClientID::FromBinary(node_info.node_id());
const NodeID node_id = NodeID::FromBinary(node_info.node_id());
RAY_LOG(DEBUG) << "[NodeRemoved] Received callback from client id " << node_id;
RAY_CHECK(node_id != self_node_id_)
@@ -810,7 +809,7 @@ void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) {
void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) {
const WorkerID worker_id = WorkerID::FromBinary(address.worker_id());
const ClientID node_id = ClientID::FromBinary(address.raylet_id());
const NodeID node_id = NodeID::FromBinary(address.raylet_id());
if (!worker_id.IsNil()) {
RAY_LOG(DEBUG) << "Worker " << worker_id << " failed";
failed_workers_cache_.insert(worker_id);
@@ -850,7 +849,7 @@ void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) {
}
}
void NodeManager::ResourceCreateUpdated(const ClientID &client_id,
void NodeManager::ResourceCreateUpdated(const NodeID &client_id,
const ResourceSet &createUpdatedResources) {
RAY_LOG(DEBUG) << "[ResourceCreateUpdated] received callback from client id "
<< client_id << " with created or updated resources: "
@@ -882,7 +881,7 @@ void NodeManager::ResourceCreateUpdated(const ClientID &client_id,
return;
}
void NodeManager::ResourceDeleted(const ClientID &client_id,
void NodeManager::ResourceDeleted(const NodeID &client_id,
const std::vector<std::string> &resource_names) {
if (RAY_LOG_ENABLED(DEBUG)) {
std::ostringstream oss;
@@ -929,7 +928,7 @@ void NodeManager::TryLocalInfeasibleTaskScheduling() {
}
}
void NodeManager::HeartbeatAdded(const ClientID &client_id,
void NodeManager::HeartbeatAdded(const NodeID &client_id,
const HeartbeatTableData &heartbeat_data) {
// Locate the client id in remote client table and update available resources based on
// the received heartbeat information.
@@ -1009,7 +1008,7 @@ void NodeManager::HeartbeatAdded(const ClientID &client_id,
void NodeManager::HeartbeatBatchAdded(const HeartbeatBatchTableData &heartbeat_batch) {
// Update load information provided by each heartbeat.
for (const auto &heartbeat_data : heartbeat_batch.batch()) {
const ClientID &client_id = ClientID::FromBinary(heartbeat_data.client_id());
const NodeID &client_id = NodeID::FromBinary(heartbeat_data.client_id());
if (client_id == self_node_id_) {
// Skip heartbeats from self.
continue;
@@ -1790,7 +1789,7 @@ void NodeManager::HandleRequestWorkerLease(const rpc::RequestWorkerLeaseRequest
leased_workers_[worker_id] = worker;
});
task.OnSpillbackInstead(
[reply, task_id, send_reply_callback](const ClientID &spillback_to,
[reply, task_id, send_reply_callback](const NodeID &spillback_to,
const std::string &address, int port) {
RAY_LOG(DEBUG) << "Worker lease request SPILLBACK " << task_id;
reply->mutable_retry_at_raylet_address()->set_ip_address(address);
@@ -2012,7 +2011,7 @@ void NodeManager::ProcessSetResourceRequest(
double const &capacity = message->capacity();
bool is_deletion = capacity <= 0;
ClientID node_id = from_flatbuf<ClientID>(*message->client_id());
NodeID node_id = from_flatbuf<NodeID>(*message->client_id());
// If the python arg was null, set node_id to the local node id.
if (node_id.IsNil()) {
@@ -2044,7 +2043,7 @@ void NodeManager::ProcessSetResourceRequest(
}
bool NodeManager::PrepareBundle(
std::unordered_map<ClientID, SchedulingResources> &resource_map,
std::unordered_map<NodeID, SchedulingResources> &resource_map,
const BundleSpecification &bundle_spec) {
// TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent
// once retry is implemented. If the resource map contains the local raylet, update load
@@ -2080,7 +2079,7 @@ bool NodeManager::PrepareBundle(
}
void NodeManager::CommitBundle(
std::unordered_map<ClientID, SchedulingResources> &resource_map,
std::unordered_map<NodeID, SchedulingResources> &resource_map,
const BundleSpecification &bundle_spec) {
// TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent
// once retry is implemented.
@@ -2106,7 +2105,7 @@ void NodeManager::CommitBundle(
}
void NodeManager::ScheduleTasks(
std::unordered_map<ClientID, SchedulingResources> &resource_map) {
std::unordered_map<NodeID, SchedulingResources> &resource_map) {
// If the resource map contains the local raylet, update load before calling policy.
if (resource_map.count(self_node_id_) > 0) {
resource_map[self_node_id_].SetLoadResources(local_queues_.GetTotalResourceLoad());
@@ -2118,17 +2117,17 @@ void NodeManager::ScheduleTasks(
RAY_LOG(DEBUG) << "[NM ScheduleTasks] policy decision:";
for (const auto &task_client_pair : policy_decision) {
TaskID task_id = task_client_pair.first;
ClientID node_id = task_client_pair.second;
NodeID node_id = task_client_pair.second;
RAY_LOG(DEBUG) << task_id << " --> " << node_id;
}
#endif
// Extract decision for this raylet.
std::unordered_set<TaskID> local_task_ids;
// Iterate over (taskid, clientid) pairs, extract tasks assigned to the local node.
// Iterate over (taskid, nodeid) pairs, extract tasks assigned to the local node.
for (const auto &task_client_pair : policy_decision) {
const TaskID &task_id = task_client_pair.first;
const ClientID &node_id = task_client_pair.second;
const NodeID &node_id = task_client_pair.second;
if (node_id == self_node_id_) {
local_task_ids.insert(task_id);
} else {
@@ -2300,7 +2299,7 @@ void NodeManager::TreatTaskAsFailedIfLost(const Task &task) {
object_id, spec.CallerAddress(),
[this, task_marked_as_failed, task](
const ray::ObjectID &object_id,
const std::unordered_set<ray::ClientID> &clients) {
const std::unordered_set<ray::NodeID> &clients) {
if (!*task_marked_as_failed) {
// Only process the object locations if we haven't already marked the
// task as failed.
@@ -2604,7 +2603,7 @@ void NodeManager::AssignTask(const std::shared_ptr<WorkerInterface> &worker,
worker->SetPlacementGroupId(spec.PlacementGroupId());
const auto owner_worker_id = WorkerID::FromBinary(spec.CallerAddress().worker_id());
const auto owner_node_id = ClientID::FromBinary(spec.CallerAddress().raylet_id());
const auto owner_node_id = NodeID::FromBinary(spec.CallerAddress().raylet_id());
RAY_CHECK(!owner_worker_id.IsNil());
RAY_LOG(DEBUG) << "Worker lease request DISPATCH " << task_id << " to worker "
<< worker->WorkerId() << ", owner ID " << owner_worker_id;
@@ -2935,8 +2934,7 @@ void NodeManager::HandleObjectMissing(const ObjectID &object_id) {
}
}
void NodeManager::ForwardTaskOrResubmit(const Task &task,
const ClientID &node_manager_id) {
void NodeManager::ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id) {
// Attempt to forward the task.
// TODO(sang): Modify method names.
ForwardTask(task, node_manager_id,
@@ -2957,7 +2955,7 @@ void NodeManager::ForwardTaskOrResubmit(const Task &task,
}
void NodeManager::ForwardTask(
const Task &task, const ClientID &node_id,
const Task &task, const NodeID &node_id,
const std::function<void(const ray::Status &, const Task &)> &on_error) {
// This method spillbacks lease requests to other nodes.
// TODO(sang): Modify method names.
+13 -13
View File
@@ -127,7 +127,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
///
/// \param resource_config The initial set of node resources.
/// \param object_manager A reference to the local object manager.
NodeManager(boost::asio::io_service &io_service, const ClientID &self_node_id,
NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id,
const NodeManagerConfig &config, ObjectManager &object_manager,
std::shared_ptr<gcs::GcsClient> gcs_client,
std::shared_ptr<ObjectDirectoryInterface> object_directory_);
@@ -191,14 +191,14 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// \param client_id ID of the node that created or updated resources.
/// \param createUpdatedResources Created or updated resources.
/// \return Void.
void ResourceCreateUpdated(const ClientID &client_id,
void ResourceCreateUpdated(const NodeID &client_id,
const ResourceSet &createUpdatedResources);
/// Handler for the deletion of a resource in the GCS
/// \param client_id ID of the node that deleted resources.
/// \param resource_names Names of deleted resources.
/// \return Void.
void ResourceDeleted(const ClientID &client_id,
void ResourceDeleted(const NodeID &client_id,
const std::vector<std::string> &resource_names);
/// Evaluates the local infeasible queue to check if any tasks can be scheduled.
@@ -226,7 +226,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// \param id The ID of the node manager that sent the heartbeat.
/// \param data The heartbeat data including load information.
/// \return Void.
void HeartbeatAdded(const ClientID &id, const HeartbeatTableData &data);
void HeartbeatAdded(const NodeID &id, const HeartbeatTableData &data);
/// Handler for a heartbeat batch notification from the GCS
///
/// \param heartbeat_batch The batch of heartbeat data.
@@ -308,7 +308,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// consider the local node manager and the node managers in the keys of the
/// resource_map argument.
/// \return Void.
void ScheduleTasks(std::unordered_map<ClientID, SchedulingResources> &resource_map);
void ScheduleTasks(std::unordered_map<NodeID, SchedulingResources> &resource_map);
/// Make a placement decision for the resource_map and subtract original resources so
/// that the node is ready to commit (create) placement group resources.
@@ -319,7 +319,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// resource_map argument.
/// \param bundle_spec Specification of bundle that will be prepared.
/// \return True is resources were successfully prepared. False otherwise.
bool PrepareBundle(std::unordered_map<ClientID, SchedulingResources> &resource_map,
bool PrepareBundle(std::unordered_map<NodeID, SchedulingResources> &resource_map,
const BundleSpecification &bundle_spec);
/// Make a placement decision for the resource_map.
@@ -329,7 +329,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// consider the local node manager and the node managers in the keys of the
/// resource_map argument.
/// \param bundle_spec Specification of bundle that will be prepared.
void CommitBundle(std::unordered_map<ClientID, SchedulingResources> &resource_map,
void CommitBundle(std::unordered_map<NodeID, SchedulingResources> &resource_map,
const BundleSpecification &bundle_spec);
/// Handle a task whose return value(s) must be reconstructed.
@@ -346,7 +346,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// \param task The task in question.
/// \param node_manager_id The ID of the remote node manager.
/// \return Void.
void ForwardTaskOrResubmit(const Task &task, const ClientID &node_manager_id);
void ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id);
/// Forward a task to another node to execute. The task is assumed to not be
/// queued in local_queues_.
///
@@ -354,7 +354,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// \param node_id The ID of the node to forward the task to.
/// \param on_error Callback on run on non-ok status.
void ForwardTask(
const Task &task, const ClientID &node_id,
const Task &task, const NodeID &node_id,
const std::function<void(const ray::Status &, const Task &)> &on_error);
/// Dispatch locally scheduled tasks. This attempts the transition from "scheduled" to
@@ -681,7 +681,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
bool IsActorCreationTask(const TaskID &task_id);
/// ID of this node.
ClientID self_node_id_;
NodeID self_node_id_;
boost::asio::io_service &io_service_;
ObjectManager &object_manager_;
/// A Plasma object store client. This is used for creating new objects in
@@ -733,7 +733,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
const NodeManagerConfig initial_config_;
/// The resources (and specific resource IDs) that are currently available.
ResourceIdSet local_available_resources_;
std::unordered_map<ClientID, SchedulingResources> cluster_resource_map_;
std::unordered_map<NodeID, SchedulingResources> cluster_resource_map_;
/// A pool of workers.
WorkerPool worker_pool_;
@@ -772,7 +772,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
rpc::ClientCallManager client_call_manager_;
/// Map from node ids to clients of the remote node managers.
std::unordered_map<ClientID, std::unique_ptr<rpc::NodeManagerClient>>
std::unordered_map<NodeID, std::unique_ptr<rpc::NodeManagerClient>>
remote_node_manager_clients_;
/// Map of workers leased out to direct call clients.
@@ -813,7 +813,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
/// Cache for the WorkerTable in the GCS.
absl::flat_hash_set<WorkerID> failed_workers_cache_;
/// Cache for the ClientTable in the GCS.
absl::flat_hash_set<ClientID> failed_nodes_cache_;
absl::flat_hash_set<NodeID> failed_nodes_cache_;
/// Concurrency for the following map
mutable absl::Mutex plasma_object_notification_lock_;
@@ -128,14 +128,14 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
int num_connected_clients = 0;
ClientID node_id_1;
ClientID node_id_2;
NodeID node_id_1;
NodeID node_id_2;
void WaitConnections() {
node_id_1 = gcs_client_1->Nodes().GetSelfId();
node_id_2 = gcs_client_2->Nodes().GetSelfId();
gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
[this](const ClientID &node_id, const rpc::GcsNodeInfo &data) {
[this](const NodeID &node_id, const rpc::GcsNodeInfo &data) {
if (node_id == node_id_1 || node_id == node_id_2) {
num_connected_clients += 1;
}
@@ -196,8 +196,8 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
RAY_LOG(INFO) << "\n"
<< "Server client ids:"
<< "\n";
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
RAY_LOG(INFO) << "Server 1: " << node_id_1;
RAY_LOG(INFO) << "Server 2: " << node_id_2;
@@ -205,13 +205,13 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
<< "All connected clients:"
<< "\n";
auto data = gcs_client_2->Nodes().Get(node_id_1);
RAY_LOG(INFO) << (ClientID::FromBinary(data->node_id()).IsNil());
RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data->node_id());
RAY_LOG(INFO) << (NodeID::FromBinary(data->node_id()).IsNil());
RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data->node_id());
RAY_LOG(INFO) << "ClientIp=" << data->node_manager_address();
RAY_LOG(INFO) << "ClientPort=" << data->node_manager_port();
rpc::GcsNodeInfo data2;
gcs_client_1->Nodes().Get(node_id_2);
RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data2->node_id());
RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data2->node_id());
RAY_LOG(INFO) << "ClientIp=" << data2->node_manager_address();
RAY_LOG(INFO) << "ClientPort=" << data2->node_manager_port();
}
+1 -1
View File
@@ -60,7 +60,7 @@ Raylet::Raylet(boost::asio::io_service &main_service, const std::string &socket_
const NodeManagerConfig &node_manager_config,
const ObjectManagerConfig &object_manager_config,
std::shared_ptr<gcs::GcsClient> gcs_client, int metrics_export_port)
: self_node_id_(ClientID::FromRandom()),
: self_node_id_(NodeID::FromRandom()),
gcs_client_(gcs_client),
object_directory_(
RayConfig::instance().ownership_based_object_directory_enabled()
+1 -1
View File
@@ -77,7 +77,7 @@ class Raylet {
friend class TestObjectManagerIntegration;
/// ID of this node.
ClientID self_node_id_;
NodeID self_node_id_;
/// Information of this node.
GcsNodeInfo self_node_info_;

Some files were not shown because too many files have changed in this diff Show More