mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 08:29:54 +08:00
[Refactor] Rename ClientId to NodeId (#10992)
* rename ClientId to NodeId * format lint * format lint * fix conflicts * rename new ClientId to NodeId * update lint * make same version of clang-format with travis ci
This commit is contained in:
@@ -66,7 +66,7 @@ from ray._raylet import (
|
||||
ActorCheckpointID,
|
||||
ActorClassID,
|
||||
ActorID,
|
||||
ClientID,
|
||||
NodeID,
|
||||
Config as _Config,
|
||||
JobID,
|
||||
WorkerID,
|
||||
@@ -151,7 +151,7 @@ __all__ += [
|
||||
"ActorCheckpointID",
|
||||
"ActorClassID",
|
||||
"ActorID",
|
||||
"ClientID",
|
||||
"NodeID",
|
||||
"JobID",
|
||||
"WorkerID",
|
||||
"FunctionID",
|
||||
|
||||
@@ -76,7 +76,7 @@ from ray.includes.unique_ids cimport (
|
||||
CActorID,
|
||||
CActorCheckpointID,
|
||||
CObjectID,
|
||||
CClientID,
|
||||
CNodeID,
|
||||
CPlacementGroupID,
|
||||
)
|
||||
from ray.includes.libcoreworker cimport (
|
||||
@@ -784,7 +784,7 @@ cdef class CoreWorker:
|
||||
CCoreWorkerProcess.GetCoreWorker().GetCurrentJobId().Binary())
|
||||
|
||||
def get_current_node_id(self):
|
||||
return ClientID(
|
||||
return NodeID(
|
||||
CCoreWorkerProcess.GetCoreWorker().GetCurrentNodeId().Binary())
|
||||
|
||||
def get_actor_id(self):
|
||||
@@ -1479,10 +1479,10 @@ cdef class CoreWorker:
|
||||
actor_id.native(), checkpoint_id.native()))
|
||||
|
||||
def set_resource(self, basestring resource_name,
|
||||
double capacity, ClientID client_id):
|
||||
double capacity, NodeID client_id):
|
||||
CCoreWorkerProcess.GetCoreWorker().SetResource(
|
||||
resource_name.encode("ascii"), capacity,
|
||||
CClientID.FromBinary(client_id.binary()))
|
||||
CNodeID.FromBinary(client_id.binary()))
|
||||
|
||||
def force_spill_objects(self, object_refs):
|
||||
cdef c_vector[CObjectID] object_ids
|
||||
|
||||
@@ -4,17 +4,17 @@ import ray
|
||||
def set_resource(resource_name, capacity, client_id=None):
|
||||
""" Set a resource to a specified capacity.
|
||||
|
||||
This creates, updates or deletes a custom resource for a target clientId.
|
||||
This creates, updates or deletes a custom resource for a target NodeID.
|
||||
If the resource already exists, it's capacity is updated to the new value.
|
||||
If the capacity is set to 0, the resource is deleted.
|
||||
If ClientID is not specified or set to None,
|
||||
If NodeID is not specified or set to None,
|
||||
the resource is created on the local client where the actor is running.
|
||||
|
||||
Args:
|
||||
resource_name (str): Name of the resource to be created
|
||||
capacity (int): Capacity of the new resource. Resource is deleted if
|
||||
capacity is 0.
|
||||
client_id (str): The ClientId of the node where the resource is to be
|
||||
client_id (str): The NodeID of the node where the resource is to be
|
||||
set.
|
||||
|
||||
Returns:
|
||||
@@ -25,9 +25,9 @@ def set_resource(resource_name, capacity, client_id=None):
|
||||
specified.
|
||||
"""
|
||||
if client_id is not None:
|
||||
client_id_obj = ray.ClientID(ray.utils.hex_to_binary(client_id))
|
||||
client_id_obj = ray.NodeID(ray.utils.hex_to_binary(client_id))
|
||||
else:
|
||||
client_id_obj = ray.ClientID.nil()
|
||||
client_id_obj = ray.NodeID.nil()
|
||||
if (capacity < 0) or (capacity != int(capacity)):
|
||||
raise ValueError(
|
||||
"Capacity {} must be a non-negative integer.".format(capacity))
|
||||
|
||||
@@ -4,7 +4,7 @@ from libcpp.vector cimport vector as c_vector
|
||||
from libcpp.memory cimport unique_ptr
|
||||
from ray.includes.unique_ids cimport (
|
||||
CActorID,
|
||||
CClientID,
|
||||
CNodeID,
|
||||
CObjectID,
|
||||
CWorkerID,
|
||||
CPlacementGroupID,
|
||||
@@ -24,7 +24,7 @@ cdef extern from "ray/gcs/gcs_client/global_state_accessor.h" nogil:
|
||||
unique_ptr[c_string] GetObjectInfo(const CObjectID &object_id)
|
||||
c_vector[c_string] GetAllActorInfo()
|
||||
unique_ptr[c_string] GetActorInfo(const CActorID &actor_id)
|
||||
c_string GetNodeResourceInfo(const CClientID &node_id)
|
||||
c_string GetNodeResourceInfo(const CNodeID &node_id)
|
||||
unique_ptr[c_string] GetWorkerInfo(const CWorkerID &worker_id)
|
||||
c_vector[c_string] GetAllWorkerInfo()
|
||||
c_bool AddWorkerInfo(const c_string &serialized_string)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from ray.includes.unique_ids cimport (
|
||||
CActorID,
|
||||
CClientID,
|
||||
CNodeID,
|
||||
CObjectID,
|
||||
CWorkerID,
|
||||
CPlacementGroupID
|
||||
@@ -89,7 +89,7 @@ cdef class GlobalStateAccessor:
|
||||
|
||||
def get_node_resource_info(self, node_id):
|
||||
cdef c_string result
|
||||
cdef CClientID cnode_id = CClientID.FromBinary(node_id.binary())
|
||||
cdef CNodeID cnode_id = CNodeID.FromBinary(node_id.binary())
|
||||
with nogil:
|
||||
result = self.inner.get().GetNodeResourceInfo(cnode_id)
|
||||
return result
|
||||
|
||||
@@ -14,7 +14,7 @@ from libcpp.vector cimport vector as c_vector
|
||||
from ray.includes.unique_ids cimport (
|
||||
CActorID,
|
||||
CActorCheckpointID,
|
||||
CClientID,
|
||||
CNodeID,
|
||||
CJobID,
|
||||
CTaskID,
|
||||
CObjectID,
|
||||
@@ -121,7 +121,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
|
||||
|
||||
CJobID GetCurrentJobId()
|
||||
CTaskID GetCurrentTaskId()
|
||||
CClientID GetCurrentNodeId()
|
||||
CNodeID GetCurrentNodeId()
|
||||
CPlacementGroupID GetCurrentPlacementGroupId()
|
||||
const CActorID &GetActorId()
|
||||
void SetActorTitle(const c_string &title)
|
||||
@@ -196,7 +196,7 @@ cdef extern from "ray/core_worker/core_worker.h" nogil:
|
||||
const CActorID &actor_id, const CActorCheckpointID &checkpoint_id)
|
||||
CRayStatus SetResource(const c_string &resource_name,
|
||||
const double capacity,
|
||||
const CClientID &client_Id)
|
||||
const CNodeID &client_Id)
|
||||
CRayStatus SpillObjects(const c_vector[CObjectID] &object_ids)
|
||||
CRayStatus ForceRestoreSpilledObjects(
|
||||
const c_vector[CObjectID] &object_ids)
|
||||
|
||||
@@ -65,10 +65,10 @@ cdef extern from "ray/common/id.h" namespace "ray" nogil:
|
||||
CActorID Of(CJobID job_id, CTaskID parent_task_id,
|
||||
int64_t parent_task_counter)
|
||||
|
||||
cdef cppclass CClientID "ray::ClientID"(CUniqueID):
|
||||
cdef cppclass CNodeID "ray::NodeID"(CUniqueID):
|
||||
|
||||
@staticmethod
|
||||
CClientID FromBinary(const c_string &binary)
|
||||
CNodeID FromBinary(const c_string &binary)
|
||||
|
||||
cdef cppclass CConfigID "ray::ConfigID"(CUniqueID):
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ from ray.includes.unique_ids cimport (
|
||||
CActorCheckpointID,
|
||||
CActorClassID,
|
||||
CActorID,
|
||||
CClientID,
|
||||
CNodeID,
|
||||
CConfigID,
|
||||
CJobID,
|
||||
CFunctionID,
|
||||
@@ -199,14 +199,14 @@ cdef class TaskID(BaseID):
|
||||
CTaskID.FromBinary(parent_task_id.binary()),
|
||||
parent_task_counter).Binary())
|
||||
|
||||
cdef class ClientID(UniqueID):
|
||||
cdef class NodeID(UniqueID):
|
||||
|
||||
def __init__(self, id):
|
||||
check_id(id)
|
||||
self.data = CClientID.FromBinary(<c_string>id)
|
||||
self.data = CNodeID.FromBinary(<c_string>id)
|
||||
|
||||
cdef CClientID native(self):
|
||||
return <CClientID>self.data
|
||||
cdef CNodeID native(self):
|
||||
return <CNodeID>self.data
|
||||
|
||||
|
||||
cdef class JobID(BaseID):
|
||||
@@ -373,7 +373,7 @@ _ID_TYPES = [
|
||||
ActorCheckpointID,
|
||||
ActorClassID,
|
||||
ActorID,
|
||||
ClientID,
|
||||
NodeID,
|
||||
JobID,
|
||||
WorkerID,
|
||||
FunctionID,
|
||||
|
||||
+1
-1
@@ -266,7 +266,7 @@ class GlobalState:
|
||||
"""
|
||||
self._check_connected()
|
||||
|
||||
node_id = ray.ClientID(hex_to_binary(node_id))
|
||||
node_id = ray.NodeID(hex_to_binary(node_id))
|
||||
node_resource_bytes = \
|
||||
self.global_state_accessor.get_node_resource_info(node_id)
|
||||
if node_resource_bytes is None:
|
||||
|
||||
@@ -23,4 +23,4 @@ DEFINE_UNIQUE_ID(ActorClassID)
|
||||
DEFINE_UNIQUE_ID(ActorCheckpointID)
|
||||
DEFINE_UNIQUE_ID(WorkerID)
|
||||
DEFINE_UNIQUE_ID(ConfigID)
|
||||
DEFINE_UNIQUE_ID(ClientID)
|
||||
DEFINE_UNIQUE_ID(NodeID)
|
||||
|
||||
@@ -13,7 +13,7 @@ typedef std::function<void(const std::shared_ptr<void>, const std::string &, int
|
||||
DispatchTaskCallback;
|
||||
/// Arguments are the raylet ID to spill back to, the raylet's
|
||||
/// address and the raylet's port.
|
||||
typedef std::function<void(const ClientID &, const std::string &, int)>
|
||||
typedef std::function<void(const NodeID &, const std::string &, int)>
|
||||
SpillbackTaskCallback;
|
||||
|
||||
typedef std::function<void()> CancelTaskCallback;
|
||||
|
||||
@@ -129,8 +129,7 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
|
||||
<< ", ip address: " << actor_data.address().ip_address()
|
||||
<< ", port: " << actor_data.address().port() << ", worker_id: "
|
||||
<< WorkerID::FromBinary(actor_data.address().worker_id())
|
||||
<< ", raylet_id: "
|
||||
<< ClientID::FromBinary(actor_data.address().raylet_id())
|
||||
<< ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id())
|
||||
<< ", num_restarts: " << actor_data.num_restarts();
|
||||
if (actor_data.state() == gcs::ActorTableData::RESTARTING) {
|
||||
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false);
|
||||
|
||||
@@ -302,7 +302,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
|
||||
auto grpc_client = rpc::NodeManagerWorkerClient::make(
|
||||
options_.raylet_ip_address, options_.node_manager_port, *client_call_manager_);
|
||||
Status raylet_client_status;
|
||||
ClientID local_raylet_id;
|
||||
NodeID local_raylet_id;
|
||||
int assigned_port;
|
||||
std::unordered_map<std::string, std::string> system_config;
|
||||
local_raylet_client_ = std::shared_ptr<raylet::RayletClient>(new raylet::RayletClient(
|
||||
@@ -352,7 +352,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
|
||||
RegisterToGcs();
|
||||
|
||||
// Register a callback to monitor removed nodes.
|
||||
auto on_node_change = [this](const ClientID &node_id, const rpc::GcsNodeInfo &data) {
|
||||
auto on_node_change = [this](const NodeID &node_id, const rpc::GcsNodeInfo &data) {
|
||||
if (data.state() == rpc::GcsNodeInfo::DEAD) {
|
||||
OnNodeRemoved(data);
|
||||
}
|
||||
@@ -395,7 +395,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
|
||||
options_.ref_counting_enabled ? reference_counter_ : nullptr, local_raylet_client_,
|
||||
options_.check_signals));
|
||||
|
||||
auto check_node_alive_fn = [this](const ClientID &node_id) {
|
||||
auto check_node_alive_fn = [this](const NodeID &node_id) {
|
||||
auto node = gcs_client_->Nodes().Get(node_id);
|
||||
if (!node) {
|
||||
return false;
|
||||
@@ -500,7 +500,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
|
||||
RAY_CHECK_OK(status);
|
||||
std::vector<rpc::Address> locations;
|
||||
for (const auto &result : results) {
|
||||
const auto &node_id = ClientID::FromBinary(result.manager());
|
||||
const auto &node_id = NodeID::FromBinary(result.manager());
|
||||
auto node = gcs_client_->Nodes().Get(node_id);
|
||||
RAY_CHECK(node.has_value());
|
||||
if (node->state() == rpc::GcsNodeInfo::ALIVE) {
|
||||
@@ -617,7 +617,7 @@ void CoreWorker::RunIOService() {
|
||||
}
|
||||
|
||||
void CoreWorker::OnNodeRemoved(const rpc::GcsNodeInfo &node_info) {
|
||||
const auto node_id = ClientID::FromBinary(node_info.node_id());
|
||||
const auto node_id = NodeID::FromBinary(node_info.node_id());
|
||||
RAY_LOG(INFO) << "Node failure " << node_id;
|
||||
const auto lost_objects = reference_counter_->ResetObjectsOnRemovedNode(node_id);
|
||||
// Delete the objects from the in-memory store to indicate that they are not
|
||||
@@ -825,7 +825,7 @@ Status CoreWorker::Put(const RayObject &object,
|
||||
worker_context_.GetNextPutIndex());
|
||||
reference_counter_->AddOwnedObject(
|
||||
*object_id, contained_object_ids, rpc_address_, CurrentCallSite(), object.GetSize(),
|
||||
/*is_reconstructable=*/false, ClientID::FromBinary(rpc_address_.raylet_id()));
|
||||
/*is_reconstructable=*/false, NodeID::FromBinary(rpc_address_.raylet_id()));
|
||||
return Put(object, contained_object_ids, *object_id, /*pin_object=*/true);
|
||||
}
|
||||
|
||||
@@ -884,7 +884,7 @@ Status CoreWorker::Create(const std::shared_ptr<Buffer> &metadata, const size_t
|
||||
reference_counter_->AddOwnedObject(*object_id, contained_object_ids, rpc_address_,
|
||||
CurrentCallSite(), data_size + metadata->Size(),
|
||||
/*is_reconstructable=*/false,
|
||||
ClientID::FromBinary(rpc_address_.raylet_id()));
|
||||
NodeID::FromBinary(rpc_address_.raylet_id()));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@@ -1153,7 +1153,7 @@ Status CoreWorker::NotifyActorResumedFromCheckpoint(
|
||||
}
|
||||
|
||||
Status CoreWorker::SetResource(const std::string &resource_name, const double capacity,
|
||||
const ClientID &client_id) {
|
||||
const NodeID &client_id) {
|
||||
return local_raylet_client_->SetResource(resource_name, capacity, client_id);
|
||||
}
|
||||
|
||||
@@ -1167,7 +1167,7 @@ void CoreWorker::SpillOwnedObject(const ObjectID &object_id,
|
||||
}
|
||||
|
||||
// Find the raylet that hosts the primary copy of the object.
|
||||
ClientID pinned_at;
|
||||
NodeID pinned_at;
|
||||
RAY_CHECK(reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at));
|
||||
auto node = gcs_client_->Nodes().Get(pinned_at);
|
||||
if (pinned_at.IsNil() || !node) {
|
||||
@@ -2023,7 +2023,7 @@ void CoreWorker::HandleAddObjectLocationOwner(
|
||||
return;
|
||||
}
|
||||
reference_counter_->AddObjectLocation(ObjectID::FromBinary(request.object_id()),
|
||||
ClientID::FromBinary(request.client_id()));
|
||||
NodeID::FromBinary(request.client_id()));
|
||||
send_reply_callback(Status::OK(), nullptr, nullptr);
|
||||
}
|
||||
|
||||
@@ -2036,7 +2036,7 @@ void CoreWorker::HandleRemoveObjectLocationOwner(
|
||||
return;
|
||||
}
|
||||
reference_counter_->RemoveObjectLocation(ObjectID::FromBinary(request.object_id()),
|
||||
ClientID::FromBinary(request.client_id()));
|
||||
NodeID::FromBinary(request.client_id()));
|
||||
send_reply_callback(Status::OK(), nullptr, nullptr);
|
||||
}
|
||||
|
||||
@@ -2048,7 +2048,7 @@ void CoreWorker::HandleGetObjectLocationsOwner(
|
||||
send_reply_callback)) {
|
||||
return;
|
||||
}
|
||||
std::unordered_set<ClientID> client_ids =
|
||||
std::unordered_set<NodeID> client_ids =
|
||||
reference_counter_->GetObjectLocations(ObjectID::FromBinary(request.object_id()));
|
||||
for (const auto &client_id : client_ids) {
|
||||
reply->add_client_ids(client_id.Binary());
|
||||
|
||||
@@ -350,9 +350,7 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
|
||||
|
||||
const JobID &GetCurrentJobId() const { return worker_context_.GetCurrentJobID(); }
|
||||
|
||||
ClientID GetCurrentNodeId() const {
|
||||
return ClientID::FromBinary(rpc_address_.raylet_id());
|
||||
}
|
||||
NodeID GetCurrentNodeId() const { return NodeID::FromBinary(rpc_address_.raylet_id()); }
|
||||
|
||||
const PlacementGroupID &GetCurrentPlacementGroupId() const {
|
||||
return worker_context_.GetCurrentPlacementGroupId();
|
||||
@@ -617,10 +615,10 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
|
||||
/// Sets a resource with the specified capacity and client id
|
||||
/// \param[in] resource_name Name of the resource to be set.
|
||||
/// \param[in] capacity Capacity of the resource.
|
||||
/// \param[in] client_Id ClientID where the resource is to be set.
|
||||
/// \param[in] client_Id NodeID where the resource is to be set.
|
||||
/// \return Status
|
||||
Status SetResource(const std::string &resource_name, const double capacity,
|
||||
const ClientID &client_id);
|
||||
const NodeID &client_id);
|
||||
|
||||
/// Request an object to be spilled to external storage.
|
||||
/// \param[in] object_ids The objects to be spilled.
|
||||
|
||||
@@ -252,7 +252,7 @@ JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeShutdown(JNIEn
|
||||
|
||||
JNIEXPORT void JNICALL Java_io_ray_runtime_RayNativeRuntime_nativeSetResource(
|
||||
JNIEnv *env, jclass, jstring resourceName, jdouble capacity, jbyteArray nodeId) {
|
||||
const auto node_id = JavaByteArrayToId<ClientID>(env, nodeId);
|
||||
const auto node_id = JavaByteArrayToId<NodeID>(env, nodeId);
|
||||
const char *native_resource_name = env->GetStringUTFChars(resourceName, JNI_FALSE);
|
||||
|
||||
auto status = ray::CoreWorkerProcess::GetCoreWorker().SetResource(
|
||||
|
||||
@@ -76,7 +76,7 @@ Java_io_ray_runtime_gcs_GlobalStateAccessor_nativeGetNodeResourceInfo(
|
||||
JNIEnv *env, jobject o, jlong gcs_accessor_ptr, jbyteArray node_id_bytes) {
|
||||
auto *gcs_accessor =
|
||||
reinterpret_cast<ray::gcs::GlobalStateAccessor *>(gcs_accessor_ptr);
|
||||
auto node_id = JavaByteArrayToId<ray::ClientID>(env, node_id_bytes);
|
||||
auto node_id = JavaByteArrayToId<ray::NodeID>(env, node_id_bytes);
|
||||
auto node_resource_info = gcs_accessor->GetNodeResourceInfo(node_id);
|
||||
return static_cast<jbyteArray>(NativeStringToJavaByteArray(env, node_resource_info));
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
using ray::ClientID;
|
||||
using ray::NodeID;
|
||||
|
||||
JNIEXPORT jbyteArray JNICALL
|
||||
Java_io_ray_runtime_task_NativeTaskExecutor_nativePrepareCheckpoint(JNIEnv *env, jclass) {
|
||||
|
||||
@@ -20,7 +20,7 @@ namespace ray {
|
||||
|
||||
Status ObjectRecoveryManager::RecoverObject(const ObjectID &object_id) {
|
||||
// Check the ReferenceCounter to see if there is a location for the object.
|
||||
ClientID pinned_at;
|
||||
NodeID pinned_at;
|
||||
bool owned_by_us = reference_counter_->IsPlasmaObjectPinned(object_id, &pinned_at);
|
||||
if (!owned_by_us) {
|
||||
return Status::Invalid(
|
||||
@@ -80,12 +80,12 @@ void ObjectRecoveryManager::PinExistingObjectCopy(
|
||||
const std::vector<rpc::Address> &other_locations) {
|
||||
// If a copy still exists, pin the object by sending a
|
||||
// PinObjectIDs RPC.
|
||||
const auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
|
||||
const auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
|
||||
RAY_LOG(DEBUG) << "Trying to pin copy of lost object " << object_id << " at node "
|
||||
<< node_id;
|
||||
|
||||
std::shared_ptr<PinObjectsInterface> client;
|
||||
if (node_id == ClientID::FromBinary(rpc_address_.raylet_id())) {
|
||||
if (node_id == NodeID::FromBinary(rpc_address_.raylet_id())) {
|
||||
client = local_object_pinning_client_;
|
||||
} else {
|
||||
absl::MutexLock lock(&mu_);
|
||||
|
||||
@@ -137,7 +137,7 @@ class ObjectRecoveryManager {
|
||||
mutable absl::Mutex mu_;
|
||||
|
||||
/// Cache of gRPC clients to remote raylets for pinning objects.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<PinObjectsInterface>>
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<PinObjectsInterface>>
|
||||
remote_object_pinning_clients_ GUARDED_BY(mu_);
|
||||
|
||||
/// Objects that are currently pending recovery. Calls to RecoverObject for
|
||||
|
||||
@@ -144,11 +144,12 @@ void ReferenceCounter::AddObjectRefStats(
|
||||
}
|
||||
}
|
||||
|
||||
void ReferenceCounter::AddOwnedObject(
|
||||
const ObjectID &object_id, const std::vector<ObjectID> &inner_ids,
|
||||
const rpc::Address &owner_address, const std::string &call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<ClientID> &pinned_at_raylet_id) {
|
||||
void ReferenceCounter::AddOwnedObject(const ObjectID &object_id,
|
||||
const std::vector<ObjectID> &inner_ids,
|
||||
const rpc::Address &owner_address,
|
||||
const std::string &call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<NodeID> &pinned_at_raylet_id) {
|
||||
RAY_LOG(DEBUG) << "Adding owned object " << object_id;
|
||||
absl::MutexLock lock(&mutex_);
|
||||
RAY_CHECK(object_id_refs_.count(object_id) == 0)
|
||||
@@ -504,12 +505,12 @@ bool ReferenceCounter::SetDeleteCallback(
|
||||
}
|
||||
|
||||
std::vector<ObjectID> ReferenceCounter::ResetObjectsOnRemovedNode(
|
||||
const ClientID &raylet_id) {
|
||||
const NodeID &raylet_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
std::vector<ObjectID> lost_objects;
|
||||
for (auto it = object_id_refs_.begin(); it != object_id_refs_.end(); it++) {
|
||||
const auto &object_id = it->first;
|
||||
if (it->second.pinned_at_raylet_id.value_or(ClientID::Nil()) == raylet_id) {
|
||||
if (it->second.pinned_at_raylet_id.value_or(NodeID::Nil()) == raylet_id) {
|
||||
lost_objects.push_back(object_id);
|
||||
ReleasePlasmaObject(it);
|
||||
}
|
||||
@@ -518,7 +519,7 @@ std::vector<ObjectID> ReferenceCounter::ResetObjectsOnRemovedNode(
|
||||
}
|
||||
|
||||
void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id,
|
||||
const ClientID &raylet_id) {
|
||||
const NodeID &raylet_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
auto it = object_id_refs_.find(object_id);
|
||||
if (it != object_id_refs_.end()) {
|
||||
@@ -539,12 +540,12 @@ void ReferenceCounter::UpdateObjectPinnedAtRaylet(const ObjectID &object_id,
|
||||
}
|
||||
|
||||
bool ReferenceCounter::IsPlasmaObjectPinned(const ObjectID &object_id,
|
||||
ClientID *pinned_at) const {
|
||||
NodeID *pinned_at) const {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
auto it = object_id_refs_.find(object_id);
|
||||
if (it != object_id_refs_.end()) {
|
||||
if (it->second.owned_by_us) {
|
||||
*pinned_at = it->second.pinned_at_raylet_id.value_or(ClientID::Nil());
|
||||
*pinned_at = it->second.pinned_at_raylet_id.value_or(NodeID::Nil());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -890,29 +891,29 @@ void ReferenceCounter::SetReleaseLineageCallback(
|
||||
}
|
||||
|
||||
void ReferenceCounter::AddObjectLocation(const ObjectID &object_id,
|
||||
const ClientID &node_id) {
|
||||
const NodeID &node_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
auto it = object_id_locations_.find(object_id);
|
||||
if (it == object_id_locations_.end()) {
|
||||
it = object_id_locations_.emplace(object_id, absl::flat_hash_set<ClientID>()).first;
|
||||
it = object_id_locations_.emplace(object_id, absl::flat_hash_set<NodeID>()).first;
|
||||
}
|
||||
it->second.insert(node_id);
|
||||
}
|
||||
|
||||
void ReferenceCounter::RemoveObjectLocation(const ObjectID &object_id,
|
||||
const ClientID &node_id) {
|
||||
const NodeID &node_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
auto it = object_id_locations_.find(object_id);
|
||||
RAY_CHECK(it != object_id_locations_.end());
|
||||
it->second.erase(node_id);
|
||||
}
|
||||
|
||||
std::unordered_set<ClientID> ReferenceCounter::GetObjectLocations(
|
||||
std::unordered_set<NodeID> ReferenceCounter::GetObjectLocations(
|
||||
const ObjectID &object_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
auto it = object_id_locations_.find(object_id);
|
||||
RAY_CHECK(it != object_id_locations_.end());
|
||||
std::unordered_set<ClientID> locations;
|
||||
std::unordered_set<NodeID> locations;
|
||||
for (const auto &location : it->second) {
|
||||
locations.insert(location);
|
||||
}
|
||||
|
||||
@@ -36,13 +36,11 @@ class ReferenceCounterInterface {
|
||||
const std::string &call_site) = 0;
|
||||
virtual bool AddBorrowedObject(const ObjectID &object_id, const ObjectID &outer_id,
|
||||
const rpc::Address &owner_address) = 0;
|
||||
virtual void AddOwnedObject(const ObjectID &object_id,
|
||||
const std::vector<ObjectID> &contained_ids,
|
||||
const rpc::Address &owner_address,
|
||||
const std::string &call_site, const int64_t object_size,
|
||||
bool is_reconstructable,
|
||||
const absl::optional<ClientID> &pinned_at_raylet_id =
|
||||
absl::optional<ClientID>()) = 0;
|
||||
virtual void AddOwnedObject(
|
||||
const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
|
||||
const rpc::Address &owner_address, const std::string &call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<NodeID> &pinned_at_raylet_id = absl::optional<NodeID>()) = 0;
|
||||
virtual bool SetDeleteCallback(
|
||||
const ObjectID &object_id,
|
||||
const std::function<void(const ObjectID &)> callback) = 0;
|
||||
@@ -169,7 +167,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
|
||||
const rpc::Address &owner_address, const std::string &call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<ClientID> &pinned_at_raylet_id = absl::optional<ClientID>())
|
||||
const absl::optional<NodeID> &pinned_at_raylet_id = absl::optional<NodeID>())
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Update the size of the object.
|
||||
@@ -321,7 +319,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
///
|
||||
/// \param[in] object_id The object to update.
|
||||
/// \param[in] raylet_id The raylet that is now pinning the object ID.
|
||||
void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const ClientID &raylet_id)
|
||||
void UpdateObjectPinnedAtRaylet(const ObjectID &object_id, const NodeID &raylet_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Check whether the object is pinned at a remote plasma store node.
|
||||
@@ -332,7 +330,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
/// \return True if the object exists and is owned by us, false otherwise. We
|
||||
/// return false here because a borrower should not know the pinned location
|
||||
/// for an object.
|
||||
bool IsPlasmaObjectPinned(const ObjectID &object_id, ClientID *pinned_at) const
|
||||
bool IsPlasmaObjectPinned(const ObjectID &object_id, NodeID *pinned_at) const
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Get and reset the objects that were pinned on the given node. This
|
||||
@@ -342,7 +340,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
///
|
||||
/// \param[in] node_id The node whose object store has been removed.
|
||||
/// \return The set of objects that were pinned on the given node.
|
||||
std::vector<ObjectID> ResetObjectsOnRemovedNode(const ClientID &raylet_id);
|
||||
std::vector<ObjectID> ResetObjectsOnRemovedNode(const NodeID &raylet_id);
|
||||
|
||||
/// Whether we have a reference to a particular ObjectID.
|
||||
///
|
||||
@@ -361,21 +359,21 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
///
|
||||
/// \param[in] object_id The object to update.
|
||||
/// \param[in] node_id The node to be added to the location table.
|
||||
void AddObjectLocation(const ObjectID &object_id, const ClientID &node_id)
|
||||
void AddObjectLocation(const ObjectID &object_id, const NodeID &node_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Remove location from the location table of the given object.
|
||||
///
|
||||
/// \param[in] object_id The object to update.
|
||||
/// \param[in] node_id The node to be removed from the location table.
|
||||
void RemoveObjectLocation(const ObjectID &object_id, const ClientID &node_id)
|
||||
void RemoveObjectLocation(const ObjectID &object_id, const NodeID &node_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Get the locations from the location table of the given object.
|
||||
///
|
||||
/// \param[in] object_id The object to get locations for.
|
||||
/// \return The nodes that have the object.
|
||||
std::unordered_set<ClientID> GetObjectLocations(const ObjectID &object_id)
|
||||
std::unordered_set<NodeID> GetObjectLocations(const ObjectID &object_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
private:
|
||||
@@ -387,7 +385,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
/// Constructor for a reference that we created.
|
||||
Reference(const rpc::Address &owner_address, std::string call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<ClientID> &pinned_at_raylet_id)
|
||||
const absl::optional<NodeID> &pinned_at_raylet_id)
|
||||
: call_site(call_site),
|
||||
object_size(object_size),
|
||||
owned_by_us(true),
|
||||
@@ -461,7 +459,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
// If this object is owned by us and stored in plasma, and reference
|
||||
// counting is enabled, then some raylet must be pinning the object value.
|
||||
// This is the address of that raylet.
|
||||
absl::optional<ClientID> pinned_at_raylet_id;
|
||||
absl::optional<NodeID> pinned_at_raylet_id;
|
||||
// Whether this object can be reconstructed via lineage. If false, then the
|
||||
// object's value will be pinned as long as it is referenced by any other
|
||||
// object's lineage.
|
||||
@@ -680,7 +678,7 @@ class ReferenceCounter : public ReferenceCounterInterface {
|
||||
/// Holds all reference counts and dependency information for tracked ObjectIDs.
|
||||
ReferenceTable object_id_refs_ GUARDED_BY(mutex_);
|
||||
|
||||
using LocationTable = absl::flat_hash_map<ObjectID, absl::flat_hash_set<ClientID>>;
|
||||
using LocationTable = absl::flat_hash_map<ObjectID, absl::flat_hash_set<NodeID>>;
|
||||
|
||||
/// Holds the client information for the owned objects. This table is seperate from
|
||||
/// the reference table because we add object reference after putting object into the
|
||||
|
||||
@@ -56,7 +56,7 @@ class MockWorkerClient : public rpc::CoreWorkerClientInterface {
|
||||
rpc::Address CreateRandomAddress(const std::string &addr) {
|
||||
rpc::Address address;
|
||||
address.set_ip_address(addr);
|
||||
address.set_raylet_id(ClientID::FromRandom().Binary());
|
||||
address.set_raylet_id(NodeID::FromRandom().Binary());
|
||||
address.set_worker_id(WorkerID::FromRandom().Binary());
|
||||
return address;
|
||||
}
|
||||
@@ -1986,11 +1986,11 @@ TEST_F(ReferenceCountLineageEnabledTest, TestPlasmaLocation) {
|
||||
|
||||
ObjectID borrowed_id = ObjectID::FromRandom();
|
||||
rc->AddLocalReference(borrowed_id, "");
|
||||
ClientID pinned_at;
|
||||
NodeID pinned_at;
|
||||
ASSERT_FALSE(rc->IsPlasmaObjectPinned(borrowed_id, &pinned_at));
|
||||
|
||||
ObjectID id = ObjectID::FromRandom();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true);
|
||||
rc->AddLocalReference(id, "");
|
||||
ASSERT_TRUE(rc->SetDeleteCallback(id, callback));
|
||||
@@ -2023,7 +2023,7 @@ TEST_F(ReferenceCountTest, TestFree) {
|
||||
auto callback = [&](const ObjectID &object_id) { deleted->insert(object_id); };
|
||||
|
||||
ObjectID id = ObjectID::FromRandom();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
|
||||
// Test free before receiving information about where the object is pinned.
|
||||
rc->AddOwnedObject(id, {}, rpc::Address(), "", 0, true);
|
||||
@@ -2034,7 +2034,7 @@ TEST_F(ReferenceCountTest, TestFree) {
|
||||
ASSERT_FALSE(rc->SetDeleteCallback(id, callback));
|
||||
ASSERT_EQ(deleted->count(id), 0);
|
||||
rc->UpdateObjectPinnedAtRaylet(id, node_id);
|
||||
ClientID pinned_at;
|
||||
NodeID pinned_at;
|
||||
ASSERT_TRUE(rc->IsPlasmaObjectPinned(id, &pinned_at));
|
||||
ASSERT_TRUE(pinned_at.IsNil());
|
||||
ASSERT_TRUE(rc->IsPlasmaObjectFreed(id));
|
||||
|
||||
@@ -183,7 +183,7 @@ void TaskManager::CompletePendingTask(const TaskID &task_id,
|
||||
reference_counter_->UpdateObjectSize(object_id, return_object.size());
|
||||
|
||||
if (return_object.in_plasma()) {
|
||||
const auto pinned_at_raylet_id = ClientID::FromBinary(worker_addr.raylet_id());
|
||||
const auto pinned_at_raylet_id = NodeID::FromBinary(worker_addr.raylet_id());
|
||||
if (check_node_alive_(pinned_at_raylet_id)) {
|
||||
reference_counter_->UpdateObjectPinnedAtRaylet(object_id, pinned_at_raylet_id);
|
||||
// Mark it as in plasma with a dummy object.
|
||||
|
||||
@@ -58,7 +58,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
|
||||
TaskManager(std::shared_ptr<CoreWorkerMemoryStore> in_memory_store,
|
||||
std::shared_ptr<ReferenceCounter> reference_counter,
|
||||
RetryTaskCallback retry_task_callback,
|
||||
const std::function<bool(const ClientID &node_id)> &check_node_alive,
|
||||
const std::function<bool(const NodeID &node_id)> &check_node_alive,
|
||||
ReconstructObjectCallback reconstruct_object_callback)
|
||||
: in_memory_store_(in_memory_store),
|
||||
reference_counter_(reference_counter),
|
||||
@@ -238,7 +238,7 @@ class TaskManager : public TaskFinisherInterface, public TaskResubmissionInterfa
|
||||
/// processing a worker's reply to check whether the node that the worker
|
||||
/// was on is still alive. If the node is down, the plasma objects returned by the task
|
||||
/// are marked as failed.
|
||||
const std::function<bool(const ClientID &node_id)> check_node_alive_;
|
||||
const std::function<bool(const NodeID &node_id)> check_node_alive_;
|
||||
/// Called when processing a worker's reply if the node that the worker was
|
||||
/// on died. This should be called to attempt to recover a plasma object
|
||||
/// returned by the task (or store an error if the object is not
|
||||
|
||||
@@ -101,7 +101,7 @@ class MockReferenceCounter : public ReferenceCounterInterface {
|
||||
void(const ObjectID &object_id, const std::vector<ObjectID> &contained_ids,
|
||||
const rpc::Address &owner_address, const std::string &call_site,
|
||||
const int64_t object_size, bool is_reconstructable,
|
||||
const absl::optional<ClientID> &pinned_at_raylet_id));
|
||||
const absl::optional<NodeID> &pinned_at_raylet_id));
|
||||
|
||||
MOCK_METHOD2(SetDeleteCallback,
|
||||
bool(const ObjectID &object_id,
|
||||
|
||||
@@ -120,7 +120,7 @@ class MockRayletClient : public WorkerLeaseInterface {
|
||||
|
||||
// Trigger reply to RequestWorkerLease.
|
||||
bool GrantWorkerLease(const std::string &address, int port,
|
||||
const ClientID &retry_at_raylet_id, bool cancel = false) {
|
||||
const NodeID &retry_at_raylet_id, bool cancel = false) {
|
||||
rpc::RequestWorkerLeaseReply reply;
|
||||
if (cancel) {
|
||||
reply.set_canceled(true);
|
||||
@@ -341,7 +341,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
@@ -354,7 +354,7 @@ TEST(DirectTaskTransportTest, TestSubmitOneTask) {
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 0);
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
ASSERT_EQ(task_finisher->num_tasks_complete, 0);
|
||||
ASSERT_EQ(task_finisher->num_tasks_failed, 0);
|
||||
@@ -382,7 +382,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -390,7 +390,7 @@ TEST(DirectTaskTransportTest, TestHandleTaskFailure) {
|
||||
TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor);
|
||||
|
||||
ASSERT_TRUE(submitter.SubmitTask(task).ok());
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
|
||||
// Simulate a system failure, i.e., worker died unexpectedly.
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask(Status::IOError("oops")));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
@@ -416,7 +416,7 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -431,17 +431,17 @@ TEST(DirectTaskTransportTest, TestConcurrentWorkerLeases) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed; worker 2 is requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
// Task 2 is pushed; worker 3 is requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 2);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 3);
|
||||
|
||||
// Task 3 is pushed; no more workers requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 3);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 3);
|
||||
|
||||
@@ -471,7 +471,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -486,7 +486,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
ASSERT_EQ(raylet_client->num_leases_canceled, 0);
|
||||
@@ -508,7 +508,7 @@ TEST(DirectTaskTransportTest, TestReuseWorkerLease) {
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 1);
|
||||
|
||||
// The second lease request is returned immediately.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 2);
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
|
||||
@@ -532,7 +532,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -547,7 +547,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
// Task 1 finishes, Task 2 is scheduled on the same worker.
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
// Task 2 finishes, Task 3 is scheduled on the same worker.
|
||||
@@ -569,7 +569,7 @@ TEST(DirectTaskTransportTest, TestRetryLeaseCancellation) {
|
||||
ASSERT_EQ(raylet_client->num_leases_canceled, i);
|
||||
ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease());
|
||||
ASSERT_EQ(raylet_client->num_leases_canceled, i);
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
// The canceled lease is not returned.
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 1);
|
||||
@@ -592,7 +592,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -605,7 +605,7 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
|
||||
ASSERT_TRUE(submitter.SubmitTask(task2).ok());
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
// Task 1 finishes, Task 2 is scheduled on the same worker.
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
@@ -624,11 +624,11 @@ TEST(DirectTaskTransportTest, TestConcurrentCancellationAndSubmission) {
|
||||
// Task 2's lease request is canceled, a new worker is requested for task 3.
|
||||
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, ClientID::Nil(), /*cancel=*/true));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("", 0, NodeID::Nil(), /*cancel=*/true));
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 3);
|
||||
|
||||
// Task 3 finishes, all workers returned.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 2);
|
||||
ASSERT_FALSE(raylet_client->ReplyCancelWorkerLease());
|
||||
@@ -649,7 +649,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -662,7 +662,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
@@ -673,7 +673,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReusedOnError) {
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 1);
|
||||
|
||||
// Task 2 runs successfully on the second worker.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 1);
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 1);
|
||||
@@ -697,7 +697,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -708,7 +708,7 @@ TEST(DirectTaskTransportTest, TestWorkerNotReturnedOnExit) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
|
||||
// Task 1 finishes with exit status; the worker is not returned.
|
||||
@@ -745,7 +745,7 @@ TEST(DirectTaskTransportTest, TestSpillback) {
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool,
|
||||
lease_client_factory, store, task_finisher,
|
||||
ClientID::Nil(), kLongTimeout, actor_creator);
|
||||
NodeID::Nil(), kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
ray::FunctionDescriptorBuilder::BuildPython("", "", "", "");
|
||||
@@ -758,14 +758,14 @@ TEST(DirectTaskTransportTest, TestSpillback) {
|
||||
ASSERT_EQ(remote_lease_clients.size(), 0);
|
||||
|
||||
// Spillback to a remote node.
|
||||
auto remote_raylet_id = ClientID::FromRandom();
|
||||
auto remote_raylet_id = NodeID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id));
|
||||
ASSERT_EQ(remote_lease_clients.count(7777), 1);
|
||||
// There should be no more callbacks on the local client.
|
||||
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil()));
|
||||
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil()));
|
||||
// Trigger retry at the remote node.
|
||||
ASSERT_TRUE(
|
||||
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil()));
|
||||
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil()));
|
||||
|
||||
// The worker is returned to the remote node, not the local one.
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
@@ -804,7 +804,7 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
|
||||
return client;
|
||||
};
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto local_raylet_id = ClientID::FromRandom();
|
||||
auto local_raylet_id = NodeID::FromRandom();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool,
|
||||
lease_client_factory, store, task_finisher,
|
||||
@@ -821,10 +821,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
|
||||
ASSERT_EQ(remote_lease_clients.size(), 0);
|
||||
|
||||
// Spillback to a remote node.
|
||||
auto remote_raylet_id = ClientID::FromRandom();
|
||||
auto remote_raylet_id = NodeID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 7777, remote_raylet_id));
|
||||
ASSERT_EQ(remote_lease_clients.count(7777), 1);
|
||||
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, ClientID::Nil()));
|
||||
ASSERT_FALSE(raylet_client->GrantWorkerLease("remote", 1234, NodeID::Nil()));
|
||||
// Trigger a spillback back to the local node.
|
||||
ASSERT_TRUE(
|
||||
remote_lease_clients[7777]->GrantWorkerLease("local", 1234, local_raylet_id));
|
||||
@@ -832,10 +832,10 @@ TEST(DirectTaskTransportTest, TestSpillbackRoundTrip) {
|
||||
ASSERT_EQ(remote_lease_clients.size(), 1);
|
||||
// There should be no more callbacks on the remote node.
|
||||
ASSERT_FALSE(
|
||||
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, ClientID::Nil()));
|
||||
remote_lease_clients[7777]->GrantWorkerLease("remote", 1234, NodeID::Nil()));
|
||||
|
||||
// The worker is returned to the local node.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("local", 1234, NodeID::Nil()));
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 1);
|
||||
ASSERT_EQ(remote_lease_clients[7777]->num_workers_returned, 0);
|
||||
@@ -868,7 +868,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
|
||||
ASSERT_TRUE(submitter.SubmitTask(same1).ok());
|
||||
@@ -877,7 +877,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
// same1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 1);
|
||||
// Another worker is requested because same2 is pending.
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 3);
|
||||
@@ -893,7 +893,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
|
||||
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
|
||||
|
||||
// different is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 2);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 3);
|
||||
|
||||
@@ -910,7 +910,7 @@ void TestSchedulingKey(const std::shared_ptr<CoreWorkerMemoryStore> store,
|
||||
ASSERT_EQ(raylet_client->num_leases_canceled, 1);
|
||||
|
||||
// Trigger reply to RequestWorkerLease to remove the canceled pending lease request
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil(), true));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil(), true));
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 2);
|
||||
|
||||
// Check that there are no entries left in the scheduling_key_entries_ hashmap. These
|
||||
@@ -994,7 +994,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
/*lease_timeout_ms=*/5, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -1009,7 +1009,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1 is pushed.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
// Task 1 finishes with failure; the worker is returned due to the error even though
|
||||
@@ -1020,7 +1020,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
|
||||
|
||||
// Task 2 runs successfully on the second worker; the worker is returned due to the
|
||||
// timeout.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::milliseconds(10)); // Sleep for 10ms, causing the lease to time out.
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
@@ -1029,7 +1029,7 @@ TEST(DirectTaskTransportTest, TestWorkerLeaseTimeout) {
|
||||
|
||||
// Task 3 runs successfully on the third worker; the worker is returned even though it
|
||||
// hasn't timed out.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1002, NodeID::Nil()));
|
||||
ASSERT_TRUE(worker_client->ReplyPushTask());
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 2);
|
||||
@@ -1053,7 +1053,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -1061,7 +1061,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
|
||||
TaskSpecification task = BuildTaskSpec(empty_resources, empty_descriptor);
|
||||
|
||||
ASSERT_TRUE(submitter.SubmitTask(task).ok());
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
|
||||
|
||||
// Try force kill, exiting the worker
|
||||
ASSERT_TRUE(submitter.CancelTask(task, true).ok());
|
||||
@@ -1077,7 +1077,7 @@ TEST(DirectTaskTransportTest, TestKillExecutingTask) {
|
||||
task.GetMutableMessage().set_task_id(
|
||||
TaskID::ForNormalTask(JobID::Nil(), TaskID::Nil(), 1).Binary());
|
||||
ASSERT_TRUE(submitter.SubmitTask(task).ok());
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1234, NodeID::Nil()));
|
||||
|
||||
// Try non-force kill, worker returns normally
|
||||
ASSERT_TRUE(submitter.CancelTask(task, false).ok());
|
||||
@@ -1105,7 +1105,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -1124,7 +1124,7 @@ TEST(DirectTaskTransportTest, TestKillPendingTask) {
|
||||
ASSERT_TRUE(raylet_client->ReplyCancelWorkerLease());
|
||||
|
||||
// Trigger reply to RequestWorkerLease to remove the canceled pending lease request
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil(), true));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil(), true));
|
||||
|
||||
// Check that there are no entries left in the scheduling_key_entries_ hashmap. These
|
||||
// would otherwise cause a memory leak.
|
||||
@@ -1141,7 +1141,7 @@ TEST(DirectTaskTransportTest, TestKillResolvingTask) {
|
||||
auto task_finisher = std::make_shared<MockTaskFinisher>();
|
||||
auto actor_creator = std::make_shared<MockActorCreator>();
|
||||
CoreWorkerDirectTaskSubmitter submitter(address, raylet_client, client_pool, nullptr,
|
||||
store, task_finisher, ClientID::Nil(),
|
||||
store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator);
|
||||
std::unordered_map<std::string, double> empty_resources;
|
||||
ray::FunctionDescriptor empty_descriptor =
|
||||
@@ -1181,7 +1181,7 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) {
|
||||
// parameter to the CoreWorkerDirectTaskSubmitter.
|
||||
uint32_t max_tasks_in_flight_per_worker = 10;
|
||||
CoreWorkerDirectTaskSubmitter submitter(
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
|
||||
|
||||
// Prepare 20 tasks and save them in a vector.
|
||||
@@ -1201,12 +1201,12 @@ TEST(DirectTaskTransportTest, TestPipeliningConcurrentWorkerLeases) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// First 10 tasks are pushed; worker 2 is requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 10);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
// Last 10 tasks are pushed; no more workers are requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 20);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
|
||||
@@ -1255,7 +1255,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
|
||||
// parameter to the CoreWorkerDirectTaskSubmitter.
|
||||
uint32_t max_tasks_in_flight_per_worker = 10;
|
||||
CoreWorkerDirectTaskSubmitter submitter(
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
|
||||
|
||||
// prepare 30 tasks and save them in a vector
|
||||
@@ -1275,7 +1275,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
|
||||
// Task 1-10 are pushed, and a new worker is requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 10);
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
// The lease is not cancelled, as there is more work to do
|
||||
@@ -1305,7 +1305,7 @@ TEST(DirectTaskTransportTest, TestPipeliningReuseWorkerLease) {
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 1);
|
||||
|
||||
// The second lease request is returned immediately.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 2);
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
|
||||
@@ -1334,7 +1334,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
|
||||
// parameter to the CoreWorkerDirectTaskSubmitter.
|
||||
uint32_t max_tasks_in_flight_per_worker = 10;
|
||||
CoreWorkerDirectTaskSubmitter submitter(
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, ClientID::Nil(),
|
||||
address, raylet_client, client_pool, nullptr, store, task_finisher, NodeID::Nil(),
|
||||
kLongTimeout, actor_creator, max_tasks_in_flight_per_worker);
|
||||
|
||||
// prepare 30 tasks and save them in a vector
|
||||
@@ -1361,7 +1361,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 0);
|
||||
|
||||
// Grant a worker lease, and check that still only 1 worker was requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1000, NodeID::Nil()));
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 1);
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 0);
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
|
||||
@@ -1399,7 +1399,7 @@ TEST(DirectTaskTransportTest, TestPipeliningNumberOfWorkersRequested) {
|
||||
ASSERT_EQ(worker_client->callbacks.size(), 10);
|
||||
|
||||
// Grant a worker lease, and check that still only 2 workers were requested.
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, ClientID::Nil()));
|
||||
ASSERT_TRUE(raylet_client->GrantWorkerLease("localhost", 1001, NodeID::Nil()));
|
||||
ASSERT_EQ(raylet_client->num_workers_requested, 2);
|
||||
ASSERT_EQ(raylet_client->num_workers_returned, 0);
|
||||
ASSERT_EQ(raylet_client->num_workers_disconnected, 0);
|
||||
|
||||
@@ -104,7 +104,7 @@ class MockObjectDirectory {
|
||||
class ObjectRecoveryManagerTest : public ::testing::Test {
|
||||
public:
|
||||
ObjectRecoveryManagerTest()
|
||||
: local_raylet_id_(ClientID::FromRandom()),
|
||||
: local_raylet_id_(NodeID::FromRandom()),
|
||||
object_directory_(std::make_shared<MockObjectDirectory>()),
|
||||
memory_store_(std::make_shared<CoreWorkerMemoryStore>()),
|
||||
raylet_client_(std::make_shared<MockRayletClient>()),
|
||||
@@ -137,7 +137,7 @@ class ObjectRecoveryManagerTest : public ::testing::Test {
|
||||
},
|
||||
/*lineage_reconstruction_enabled=*/true) {}
|
||||
|
||||
ClientID local_raylet_id_;
|
||||
NodeID local_raylet_id_;
|
||||
std::unordered_map<ObjectID, bool> failed_reconstructions_;
|
||||
|
||||
std::shared_ptr<MockObjectDirectory> object_directory_;
|
||||
|
||||
@@ -46,7 +46,7 @@ class TaskManagerTest : public ::testing::Test {
|
||||
num_retries_++;
|
||||
return Status::OK();
|
||||
},
|
||||
[this](const ClientID &node_id) { return all_nodes_alive_; },
|
||||
[this](const NodeID &node_id) { return all_nodes_alive_; },
|
||||
[this](const ObjectID &object_id) {
|
||||
objects_to_recover_.push_back(object_id);
|
||||
}) {}
|
||||
|
||||
@@ -233,9 +233,9 @@ CoreWorkerDirectTaskSubmitter::GetOrConnectLeaseClient(
|
||||
const rpc::Address *raylet_address) {
|
||||
std::shared_ptr<WorkerLeaseInterface> lease_client;
|
||||
if (raylet_address &&
|
||||
ClientID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) {
|
||||
NodeID::FromBinary(raylet_address->raylet_id()) != local_raylet_id_) {
|
||||
// A remote raylet was specified. Connect to the raylet if needed.
|
||||
ClientID raylet_id = ClientID::FromBinary(raylet_address->raylet_id());
|
||||
NodeID raylet_id = NodeID::FromBinary(raylet_address->raylet_id());
|
||||
auto it = remote_lease_clients_.find(raylet_id);
|
||||
if (it == remote_lease_clients_.end()) {
|
||||
RAY_LOG(DEBUG) << "Connecting to raylet " << raylet_id;
|
||||
|
||||
@@ -55,7 +55,7 @@ class CoreWorkerDirectTaskSubmitter {
|
||||
std::shared_ptr<rpc::CoreWorkerClientPool> core_worker_client_pool,
|
||||
LeaseClientFactoryFn lease_client_factory,
|
||||
std::shared_ptr<CoreWorkerMemoryStore> store,
|
||||
std::shared_ptr<TaskFinisherInterface> task_finisher, ClientID local_raylet_id,
|
||||
std::shared_ptr<TaskFinisherInterface> task_finisher, NodeID local_raylet_id,
|
||||
int64_t lease_timeout_ms, std::shared_ptr<ActorCreatorInterface> actor_creator,
|
||||
uint32_t max_tasks_in_flight_per_worker =
|
||||
RayConfig::instance().max_tasks_in_flight_per_worker(),
|
||||
@@ -154,8 +154,8 @@ class CoreWorkerDirectTaskSubmitter {
|
||||
std::shared_ptr<WorkerLeaseInterface> local_lease_client_;
|
||||
|
||||
/// Cache of gRPC clients to remote raylets.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<WorkerLeaseInterface>>
|
||||
remote_lease_clients_ GUARDED_BY(mu_);
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<WorkerLeaseInterface>> remote_lease_clients_
|
||||
GUARDED_BY(mu_);
|
||||
|
||||
/// Factory for producing new clients to request leases from remote nodes.
|
||||
LeaseClientFactoryFn lease_client_factory_;
|
||||
@@ -172,7 +172,7 @@ class CoreWorkerDirectTaskSubmitter {
|
||||
|
||||
/// The local raylet ID. Used to make sure that we use the local lease client
|
||||
/// if a remote raylet tells us to spill the task back to the local raylet.
|
||||
const ClientID local_raylet_id_;
|
||||
const NodeID local_raylet_id_;
|
||||
|
||||
/// Interface for actor creation.
|
||||
std::shared_ptr<ActorCreatorInterface> actor_creator_;
|
||||
|
||||
+13
-14
@@ -370,7 +370,7 @@ class ObjectInfoAccessor {
|
||||
/// \param node_id The location that will be added to GCS.
|
||||
/// \param callback Callback that will be called after object has been added to GCS.
|
||||
/// \return Status
|
||||
virtual Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
virtual Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) = 0;
|
||||
|
||||
/// Remove location of object from GCS asynchronously.
|
||||
@@ -379,7 +379,7 @@ class ObjectInfoAccessor {
|
||||
/// \param node_id The location that will be removed from GCS.
|
||||
/// \param callback Callback that will be called after the delete finished.
|
||||
/// \return Status
|
||||
virtual Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
virtual Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) = 0;
|
||||
|
||||
/// Subscribe to any update of an object's location.
|
||||
@@ -434,8 +434,8 @@ class NodeInfoAccessor {
|
||||
|
||||
/// Get id of local node which was registered by 'RegisterSelf'.
|
||||
///
|
||||
/// \return ClientID
|
||||
virtual const ClientID &GetSelfId() const = 0;
|
||||
/// \return NodeID
|
||||
virtual const NodeID &GetSelfId() const = 0;
|
||||
|
||||
/// Get information of local node which was registered by 'RegisterSelf'.
|
||||
///
|
||||
@@ -455,7 +455,7 @@ class NodeInfoAccessor {
|
||||
/// \param node_id The ID of node that to be unregistered.
|
||||
/// \param callback Callback that will be called when unregistration is complete.
|
||||
/// \return Status
|
||||
virtual Status AsyncUnregister(const ClientID &node_id,
|
||||
virtual Status AsyncUnregister(const NodeID &node_id,
|
||||
const StatusCallback &callback) = 0;
|
||||
|
||||
/// Get information of all nodes from GCS asynchronously.
|
||||
@@ -472,7 +472,7 @@ class NodeInfoAccessor {
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
virtual Status AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe,
|
||||
const SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) = 0;
|
||||
|
||||
/// Get node information from local cache.
|
||||
@@ -483,7 +483,7 @@ class NodeInfoAccessor {
|
||||
/// \param node_id The ID of node to look up in local cache.
|
||||
/// \return The item returned by GCS. If the item to read doesn't exist,
|
||||
/// this optional object is empty.
|
||||
virtual boost::optional<rpc::GcsNodeInfo> Get(const ClientID &node_id) const = 0;
|
||||
virtual boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id) const = 0;
|
||||
|
||||
/// Get information of all nodes from local cache.
|
||||
/// Non-thread safe.
|
||||
@@ -491,7 +491,7 @@ class NodeInfoAccessor {
|
||||
/// is called before.
|
||||
///
|
||||
/// \return All nodes in cache.
|
||||
virtual const std::unordered_map<ClientID, rpc::GcsNodeInfo> &GetAll() const = 0;
|
||||
virtual const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const = 0;
|
||||
|
||||
/// Search the local cache to find out if the given node is removed.
|
||||
/// Non-thread safe.
|
||||
@@ -500,7 +500,7 @@ class NodeInfoAccessor {
|
||||
///
|
||||
/// \param node_id The id of the node to check.
|
||||
/// \return Whether the node is removed.
|
||||
virtual bool IsRemoved(const ClientID &node_id) const = 0;
|
||||
virtual bool IsRemoved(const NodeID &node_id) const = 0;
|
||||
|
||||
// TODO(micafan) Define ResourceMap in GCS proto.
|
||||
typedef std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>>
|
||||
@@ -511,7 +511,7 @@ class NodeInfoAccessor {
|
||||
/// \param node_id The ID of node to lookup dynamic resources.
|
||||
/// \param callback Callback that will be called after lookup finishes.
|
||||
/// \return Status
|
||||
virtual Status AsyncGetResources(const ClientID &node_id,
|
||||
virtual Status AsyncGetResources(const NodeID &node_id,
|
||||
const OptionalItemCallback<ResourceMap> &callback) = 0;
|
||||
|
||||
/// Update resources of node in GCS asynchronously.
|
||||
@@ -519,8 +519,7 @@ class NodeInfoAccessor {
|
||||
/// \param node_id The ID of node to update dynamic resources.
|
||||
/// \param resources The dynamic resources of node to be updated.
|
||||
/// \param callback Callback that will be called after update finishes.
|
||||
virtual Status AsyncUpdateResources(const ClientID &node_id,
|
||||
const ResourceMap &resources,
|
||||
virtual Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
|
||||
const StatusCallback &callback) = 0;
|
||||
|
||||
/// Delete resources of a node from GCS asynchronously.
|
||||
@@ -528,7 +527,7 @@ class NodeInfoAccessor {
|
||||
/// \param node_id The ID of node to delete resources from GCS.
|
||||
/// \param resource_names The names of resource to be deleted.
|
||||
/// \param callback Callback that will be called after delete finishes.
|
||||
virtual Status AsyncDeleteResources(const ClientID &node_id,
|
||||
virtual Status AsyncDeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) = 0;
|
||||
|
||||
@@ -560,7 +559,7 @@ class NodeInfoAccessor {
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
virtual Status AsyncSubscribeHeartbeat(
|
||||
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
|
||||
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
|
||||
const StatusCallback &done) = 0;
|
||||
|
||||
/// Report state of all nodes to GCS asynchronously.
|
||||
|
||||
@@ -127,7 +127,7 @@ std::unique_ptr<std::string> GlobalStateAccessor::GetObjectInfo(
|
||||
return object_info;
|
||||
}
|
||||
|
||||
std::string GlobalStateAccessor::GetNodeResourceInfo(const ClientID &node_id) {
|
||||
std::string GlobalStateAccessor::GetNodeResourceInfo(const NodeID &node_id) {
|
||||
rpc::ResourceMap node_resource_map;
|
||||
std::promise<void> promise;
|
||||
auto on_done =
|
||||
|
||||
@@ -84,7 +84,7 @@ class GlobalStateAccessor {
|
||||
/// \return node resource map info. To support multi-language, we serialize each
|
||||
/// ResourceTableData and return the serialized string. Where used, it needs to be
|
||||
/// deserialized with protobuf function.
|
||||
std::string GetNodeResourceInfo(const ClientID &node_id);
|
||||
std::string GetNodeResourceInfo(const NodeID &node_id);
|
||||
|
||||
/// Get internal config from GCS Service.
|
||||
///
|
||||
|
||||
@@ -457,7 +457,7 @@ ServiceBasedNodeInfoAccessor::ServiceBasedNodeInfoAccessor(
|
||||
: client_impl_(client_impl) {}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info) {
|
||||
auto node_id = ClientID::FromBinary(local_node_info.node_id());
|
||||
auto node_id = NodeID::FromBinary(local_node_info.node_id());
|
||||
RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id
|
||||
<< ", address is = " << local_node_info.node_manager_address();
|
||||
RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected.";
|
||||
@@ -472,7 +472,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_
|
||||
const Status &status, const rpc::RegisterNodeReply &reply) {
|
||||
if (status.ok()) {
|
||||
local_node_info_.CopyFrom(local_node_info);
|
||||
local_node_id_ = ClientID::FromBinary(local_node_info.node_id());
|
||||
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
|
||||
}
|
||||
RAY_LOG(DEBUG) << "Finished registering node info, status = " << status
|
||||
<< ", node id = " << node_id;
|
||||
@@ -486,7 +486,7 @@ Status ServiceBasedNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
|
||||
RAY_CHECK(!local_node_id_.IsNil()) << "This node is disconnected.";
|
||||
ClientID node_id = ClientID::FromBinary(local_node_info_.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(local_node_info_.node_id());
|
||||
RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id;
|
||||
rpc::UnregisterNodeRequest request;
|
||||
request.set_node_id(local_node_info_.node_id());
|
||||
@@ -495,7 +495,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
|
||||
[this, node_id](const Status &status, const rpc::UnregisterNodeReply &reply) {
|
||||
if (status.ok()) {
|
||||
local_node_info_.set_state(GcsNodeInfo::DEAD);
|
||||
local_node_id_ = ClientID::Nil();
|
||||
local_node_id_ = NodeID::Nil();
|
||||
}
|
||||
RAY_LOG(INFO) << "Finished unregistering node info, status = " << status
|
||||
<< ", node id = " << node_id;
|
||||
@@ -503,7 +503,7 @@ Status ServiceBasedNodeInfoAccessor::UnregisterSelf() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
const ClientID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; }
|
||||
const NodeID &ServiceBasedNodeInfoAccessor::GetSelfId() const { return local_node_id_; }
|
||||
|
||||
const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const {
|
||||
return local_node_info_;
|
||||
@@ -511,7 +511,7 @@ const GcsNodeInfo &ServiceBasedNodeInfoAccessor::GetSelfInfo() const {
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) {
|
||||
ClientID node_id = ClientID::FromBinary(node_info.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
RAY_LOG(DEBUG) << "Registering node info, node id = " << node_id;
|
||||
rpc::RegisterNodeRequest request;
|
||||
request.mutable_node_info()->CopyFrom(node_info);
|
||||
@@ -527,7 +527,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncRegister(const rpc::GcsNodeInfo &node_
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
RAY_LOG(DEBUG) << "Unregistering node info, node id = " << node_id;
|
||||
rpc::UnregisterNodeRequest request;
|
||||
@@ -563,8 +563,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetAll(
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
RAY_CHECK(node_change_callback_ == nullptr);
|
||||
node_change_callback_ = subscribe;
|
||||
@@ -597,7 +596,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeToNodeChange(
|
||||
}
|
||||
|
||||
boost::optional<GcsNodeInfo> ServiceBasedNodeInfoAccessor::Get(
|
||||
const ClientID &node_id) const {
|
||||
const NodeID &node_id) const {
|
||||
RAY_CHECK(!node_id.IsNil());
|
||||
auto entry = node_cache_.find(node_id);
|
||||
if (entry != node_cache_.end()) {
|
||||
@@ -606,17 +605,17 @@ boost::optional<GcsNodeInfo> ServiceBasedNodeInfoAccessor::Get(
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &ServiceBasedNodeInfoAccessor::GetAll()
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &ServiceBasedNodeInfoAccessor::GetAll()
|
||||
const {
|
||||
return node_cache_;
|
||||
}
|
||||
|
||||
bool ServiceBasedNodeInfoAccessor::IsRemoved(const ClientID &node_id) const {
|
||||
bool ServiceBasedNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
|
||||
return removed_nodes_.count(node_id) == 1;
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncGetResources(
|
||||
const ClientID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
|
||||
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
|
||||
RAY_LOG(DEBUG) << "Getting node resources, node id = " << node_id;
|
||||
rpc::GetResourcesRequest request;
|
||||
request.set_node_id(node_id.Binary());
|
||||
@@ -636,8 +635,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncGetResources(
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources(
|
||||
const ClientID &node_id, const ResourceMap &resources,
|
||||
const StatusCallback &callback) {
|
||||
const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) {
|
||||
RAY_LOG(DEBUG) << "Updating node resources, node id = " << node_id;
|
||||
rpc::UpdateResourcesRequest request;
|
||||
request.set_node_id(node_id.Binary());
|
||||
@@ -664,7 +662,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncUpdateResources(
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncDeleteResources(
|
||||
const ClientID &node_id, const std::vector<std::string> &resource_names,
|
||||
const NodeID &node_id, const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) {
|
||||
RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id;
|
||||
rpc::DeleteResourcesRequest request;
|
||||
@@ -732,7 +730,7 @@ void ServiceBasedNodeInfoAccessor::AsyncReReportHeartbeat() {
|
||||
}
|
||||
|
||||
Status ServiceBasedNodeInfoAccessor::AsyncSubscribeHeartbeat(
|
||||
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
|
||||
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
const std::string error_msg =
|
||||
"Unsupported method of AsyncSubscribeHeartbeat in ServiceBasedNodeInfoAccessor.";
|
||||
@@ -766,7 +764,7 @@ Status ServiceBasedNodeInfoAccessor::AsyncSubscribeBatchHeartbeat(
|
||||
}
|
||||
|
||||
void ServiceBasedNodeInfoAccessor::HandleNotification(const GcsNodeInfo &node_info) {
|
||||
ClientID node_id = ClientID::FromBinary(node_info.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
|
||||
auto entry = node_cache_.find(node_id);
|
||||
bool is_notif_new;
|
||||
@@ -974,7 +972,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) {
|
||||
Status ServiceBasedTaskInfoAccessor::AsyncAddTaskLease(
|
||||
const std::shared_ptr<rpc::TaskLeaseData> &data_ptr, const StatusCallback &callback) {
|
||||
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
|
||||
ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id());
|
||||
NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id());
|
||||
RAY_LOG(DEBUG) << "Adding task lease, task id = " << task_id
|
||||
<< ", node id = " << node_id;
|
||||
rpc::AddTaskLeaseRequest request;
|
||||
@@ -1061,7 +1059,7 @@ Status ServiceBasedTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &tas
|
||||
Status ServiceBasedTaskInfoAccessor::AttemptTaskReconstruction(
|
||||
const std::shared_ptr<rpc::TaskReconstructionData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
ClientID node_id = ClientID::FromBinary(data_ptr->node_manager_id());
|
||||
NodeID node_id = NodeID::FromBinary(data_ptr->node_manager_id());
|
||||
RAY_LOG(DEBUG) << "Reconstructing task, reconstructions num = "
|
||||
<< data_ptr->num_reconstructions() << ", node id = " << node_id;
|
||||
rpc::AttemptTaskReconstructionRequest request;
|
||||
@@ -1151,7 +1149,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncGetAll(
|
||||
}
|
||||
|
||||
Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
|
||||
const ClientID &node_id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
RAY_LOG(DEBUG) << "Adding object location, object id = " << object_id
|
||||
<< ", node id = " << node_id;
|
||||
@@ -1179,7 +1177,7 @@ Status ServiceBasedObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_i
|
||||
}
|
||||
|
||||
Status ServiceBasedObjectInfoAccessor::AsyncRemoveLocation(
|
||||
const ObjectID &object_id, const ClientID &node_id, const StatusCallback &callback) {
|
||||
const ObjectID &object_id, const NodeID &node_id, const StatusCallback &callback) {
|
||||
RAY_LOG(DEBUG) << "Removing object location, object id = " << object_id
|
||||
<< ", node id = " << node_id;
|
||||
rpc::RemoveObjectLocationRequest request;
|
||||
@@ -1299,7 +1297,7 @@ ServiceBasedStatsInfoAccessor::ServiceBasedStatsInfoAccessor(
|
||||
Status ServiceBasedStatsInfoAccessor::AsyncAddProfileData(
|
||||
const std::shared_ptr<rpc::ProfileTableData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
ClientID node_id = ClientID::FromBinary(data_ptr->component_id());
|
||||
NodeID node_id = NodeID::FromBinary(data_ptr->component_id());
|
||||
RAY_LOG(DEBUG) << "Adding profile data, component type = " << data_ptr->component_type()
|
||||
<< ", node id = " << node_id;
|
||||
rpc::AddProfileDataRequest request;
|
||||
|
||||
@@ -156,35 +156,34 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
|
||||
|
||||
Status UnregisterSelf() override;
|
||||
|
||||
const ClientID &GetSelfId() const override;
|
||||
const NodeID &GetSelfId() const override;
|
||||
|
||||
const GcsNodeInfo &GetSelfInfo() const override;
|
||||
|
||||
Status AsyncRegister(const rpc::GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncUnregister(const ClientID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
|
||||
|
||||
Status AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
boost::optional<GcsNodeInfo> Get(const ClientID &node_id) const override;
|
||||
boost::optional<GcsNodeInfo> Get(const NodeID &node_id) const override;
|
||||
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &GetAll() const override;
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
|
||||
|
||||
bool IsRemoved(const ClientID &node_id) const override;
|
||||
bool IsRemoved(const NodeID &node_id) const override;
|
||||
|
||||
Status AsyncGetResources(const ClientID &node_id,
|
||||
Status AsyncGetResources(const NodeID &node_id,
|
||||
const OptionalItemCallback<ResourceMap> &callback) override;
|
||||
|
||||
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
|
||||
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncDeleteResources(const ClientID &node_id,
|
||||
Status AsyncDeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
@@ -197,7 +196,7 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
|
||||
void AsyncReReportHeartbeat() override;
|
||||
|
||||
Status AsyncSubscribeHeartbeat(
|
||||
const SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
|
||||
const SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncReportBatchHeartbeat(
|
||||
@@ -240,20 +239,20 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
|
||||
ServiceBasedGcsClient *client_impl_;
|
||||
|
||||
using NodeChangeCallback =
|
||||
std::function<void(const ClientID &id, const GcsNodeInfo &node_info)>;
|
||||
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
|
||||
|
||||
GcsNodeInfo local_node_info_;
|
||||
ClientID local_node_id_;
|
||||
NodeID local_node_id_;
|
||||
|
||||
Sequencer<ClientID> sequencer_;
|
||||
Sequencer<NodeID> sequencer_;
|
||||
|
||||
/// The callback to call when a new node is added or a node is removed.
|
||||
NodeChangeCallback node_change_callback_{nullptr};
|
||||
|
||||
/// A cache for information about all nodes.
|
||||
std::unordered_map<ClientID, GcsNodeInfo> node_cache_;
|
||||
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
|
||||
/// The set of removed nodes.
|
||||
std::unordered_set<ClientID> removed_nodes_;
|
||||
std::unordered_set<NodeID> removed_nodes_;
|
||||
};
|
||||
|
||||
/// \class ServiceBasedTaskInfoAccessor
|
||||
@@ -329,10 +328,10 @@ class ServiceBasedObjectInfoAccessor : public ObjectInfoAccessor {
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::ObjectLocationInfo> &callback) override;
|
||||
|
||||
Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeToLocations(
|
||||
|
||||
@@ -139,7 +139,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) {
|
||||
for (int index = 0; index < node_count; ++index) {
|
||||
auto node_table_data =
|
||||
Mocker::GenNodeInfo(index, std::string("127.0.0.") + std::to_string(index));
|
||||
auto node_id = ClientID::FromBinary(node_table_data->node_id());
|
||||
auto node_id = NodeID::FromBinary(node_table_data->node_id());
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Nodes().AsyncRegister(
|
||||
*node_table_data, [&promise](Status status) { promise.set_value(status.ok()); }));
|
||||
@@ -158,7 +158,7 @@ TEST_F(GlobalStateAccessorTest, TestNodeResourceTable) {
|
||||
rpc::GcsNodeInfo node_data;
|
||||
node_data.ParseFromString(node_table[index]);
|
||||
auto resource_map_str =
|
||||
global_state_->GetNodeResourceInfo(ClientID::FromBinary(node_data.node_id()));
|
||||
global_state_->GetNodeResourceInfo(NodeID::FromBinary(node_data.node_id()));
|
||||
rpc::ResourceMap resource_map;
|
||||
resource_map.ParseFromString(resource_map_str);
|
||||
ASSERT_EQ(
|
||||
@@ -195,7 +195,7 @@ TEST_F(GlobalStateAccessorTest, TestProfileTable) {
|
||||
int profile_count = RayConfig::instance().maximum_profile_table_rows_count() + 1;
|
||||
ASSERT_EQ(global_state_->GetAllProfileInfo().size(), 0);
|
||||
for (int index = 0; index < profile_count; ++index) {
|
||||
auto client_id = ClientID::FromRandom();
|
||||
auto client_id = NodeID::FromRandom();
|
||||
auto profile_table_data = Mocker::GenProfileTableData(client_id);
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Stats().AsyncAddProfileData(
|
||||
@@ -215,7 +215,7 @@ TEST_F(GlobalStateAccessorTest, TestObjectTable) {
|
||||
for (int index = 0; index < object_count; ++index) {
|
||||
ObjectID object_id = ObjectID::FromRandom();
|
||||
object_ids.emplace_back(object_id);
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation(
|
||||
object_id, node_id,
|
||||
|
||||
@@ -221,7 +221,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
bool SubscribeToNodeChange(
|
||||
const gcs::SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe) {
|
||||
const gcs::SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe) {
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Nodes().AsyncSubscribeToNodeChange(
|
||||
subscribe, [&promise](Status status) { promise.set_value(status.ok()); }));
|
||||
@@ -258,7 +258,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
return nodes;
|
||||
}
|
||||
|
||||
bool UnregisterNode(const ClientID &node_id) {
|
||||
bool UnregisterNode(const NodeID &node_id) {
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Nodes().AsyncUnregister(
|
||||
node_id, [&promise](Status status) { promise.set_value(status.ok()); }));
|
||||
@@ -272,7 +272,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
return WaitReady(promise.get_future(), timeout_ms_);
|
||||
}
|
||||
|
||||
gcs::NodeInfoAccessor::ResourceMap GetResources(const ClientID &node_id) {
|
||||
gcs::NodeInfoAccessor::ResourceMap GetResources(const NodeID &node_id) {
|
||||
gcs::NodeInfoAccessor::ResourceMap resource_map;
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Nodes().AsyncGetResources(
|
||||
@@ -288,7 +288,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
return resource_map;
|
||||
}
|
||||
|
||||
bool UpdateResources(const ClientID &node_id, const std::string &key) {
|
||||
bool UpdateResources(const NodeID &node_id, const std::string &key) {
|
||||
std::promise<bool> promise;
|
||||
gcs::NodeInfoAccessor::ResourceMap resource_map;
|
||||
auto resource = std::make_shared<rpc::ResourceTableData>();
|
||||
@@ -300,7 +300,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
return WaitReady(promise.get_future(), timeout_ms_);
|
||||
}
|
||||
|
||||
bool DeleteResources(const ClientID &node_id,
|
||||
bool DeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names) {
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Nodes().AsyncDeleteResources(
|
||||
@@ -415,7 +415,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
RAY_CHECK_OK(gcs_client_->Objects().AsyncUnsubscribeToLocations(object_id));
|
||||
}
|
||||
|
||||
bool AddLocation(const ObjectID &object_id, const ClientID &node_id) {
|
||||
bool AddLocation(const ObjectID &object_id, const NodeID &node_id) {
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Objects().AsyncAddLocation(
|
||||
object_id, node_id,
|
||||
@@ -423,7 +423,7 @@ class ServiceBasedGcsClientTest : public ::testing::Test {
|
||||
return WaitReady(promise.get_future(), timeout_ms_);
|
||||
}
|
||||
|
||||
bool RemoveLocation(const ObjectID &object_id, const ClientID &node_id) {
|
||||
bool RemoveLocation(const ObjectID &object_id, const NodeID &node_id) {
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(gcs_client_->Objects().AsyncRemoveLocation(
|
||||
object_id, node_id,
|
||||
@@ -601,12 +601,12 @@ TEST_F(ServiceBasedGcsClientTest, TestActorSubscribeAll) {
|
||||
TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) {
|
||||
// Create gcs node info.
|
||||
auto gcs_node1_info = Mocker::GenNodeInfo();
|
||||
ClientID node1_id = ClientID::FromBinary(gcs_node1_info->node_id());
|
||||
NodeID node1_id = NodeID::FromBinary(gcs_node1_info->node_id());
|
||||
|
||||
// Subscribe to node addition and removal events from GCS.
|
||||
std::atomic<int> register_count(0);
|
||||
std::atomic<int> unregister_count(0);
|
||||
auto on_subscribe = [®ister_count, &unregister_count](const ClientID &node_id,
|
||||
auto on_subscribe = [®ister_count, &unregister_count](const NodeID &node_id,
|
||||
const rpc::GcsNodeInfo &data) {
|
||||
if (data.state() == rpc::GcsNodeInfo::ALIVE) {
|
||||
++register_count;
|
||||
@@ -625,7 +625,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeInfo) {
|
||||
|
||||
// Register a node to GCS.
|
||||
auto gcs_node2_info = Mocker::GenNodeInfo();
|
||||
ClientID node2_id = ClientID::FromBinary(gcs_node2_info->node_id());
|
||||
NodeID node2_id = NodeID::FromBinary(gcs_node2_info->node_id());
|
||||
ASSERT_TRUE(RegisterNode(*gcs_node2_info));
|
||||
WaitForExpectedCount(register_count, 2);
|
||||
|
||||
@@ -671,7 +671,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeResources) {
|
||||
RAY_CHECK(RegisterNode(*node_info));
|
||||
|
||||
// Update resources of node in GCS.
|
||||
ClientID node_id = ClientID::FromBinary(node_info->node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info->node_id());
|
||||
std::string key = "CPU";
|
||||
ASSERT_TRUE(UpdateResources(node_id, key));
|
||||
WaitForExpectedCount(add_count, 1);
|
||||
@@ -697,7 +697,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeHeartbeat) {
|
||||
RAY_CHECK(RegisterNode(*node_info));
|
||||
|
||||
// Report heartbeat of a node to GCS.
|
||||
ClientID node_id = ClientID::FromBinary(node_info->node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info->node_id());
|
||||
auto heartbeat = std::make_shared<rpc::HeartbeatTableData>();
|
||||
heartbeat->set_client_id(node_id.Binary());
|
||||
// Set this flag because GCS won't publish unchanged heartbeat.
|
||||
@@ -747,7 +747,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) {
|
||||
ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe));
|
||||
|
||||
// Add a task lease to GCS.
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
|
||||
ASSERT_TRUE(AddTaskLease(task_lease));
|
||||
WaitForExpectedCount(task_lease_count, 2);
|
||||
@@ -771,7 +771,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskInfo) {
|
||||
|
||||
TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) {
|
||||
ObjectID object_id = ObjectID::FromRandom();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
|
||||
// Subscribe to any update of an object's location.
|
||||
std::atomic<int> object_add_count(0);
|
||||
@@ -816,7 +816,7 @@ TEST_F(ServiceBasedGcsClientTest, TestObjectInfo) {
|
||||
|
||||
TEST_F(ServiceBasedGcsClientTest, TestStats) {
|
||||
// Add profile data to GCS.
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
auto profile_table_data = Mocker::GenProfileTableData(node_id);
|
||||
ASSERT_TRUE(AddProfileData(profile_table_data));
|
||||
}
|
||||
@@ -938,7 +938,7 @@ TEST_F(ServiceBasedGcsClientTest, TestActorTableResubscribe) {
|
||||
TEST_F(ServiceBasedGcsClientTest, TestObjectTableResubscribe) {
|
||||
ObjectID object1_id = ObjectID::FromRandom();
|
||||
ObjectID object2_id = ObjectID::FromRandom();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
|
||||
// Subscribe to any update of an object's location.
|
||||
std::atomic<int> object1_change_count(0);
|
||||
@@ -985,7 +985,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
|
||||
// Test that subscription of the node table can still work when GCS server restarts.
|
||||
// Subscribe to node addition and removal events from GCS and cache those information.
|
||||
std::atomic<int> node_change_count(0);
|
||||
auto node_subscribe = [&node_change_count](const ClientID &id,
|
||||
auto node_subscribe = [&node_change_count](const NodeID &id,
|
||||
const rpc::GcsNodeInfo &result) {
|
||||
++node_change_count;
|
||||
};
|
||||
@@ -1009,7 +1009,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
|
||||
|
||||
auto node_info = Mocker::GenNodeInfo(1);
|
||||
ASSERT_TRUE(RegisterNode(*node_info));
|
||||
ClientID node_id = ClientID::FromBinary(node_info->node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info->node_id());
|
||||
std::string key = "CPU";
|
||||
ASSERT_TRUE(UpdateResources(node_id, key));
|
||||
auto heartbeat = std::make_shared<rpc::HeartbeatTableData>();
|
||||
@@ -1023,7 +1023,7 @@ TEST_F(ServiceBasedGcsClientTest, TestNodeTableResubscribe) {
|
||||
|
||||
node_info = Mocker::GenNodeInfo(1);
|
||||
ASSERT_TRUE(RegisterNode(*node_info));
|
||||
node_id = ClientID::FromBinary(node_info->node_id());
|
||||
node_id = NodeID::FromBinary(node_info->node_id());
|
||||
ASSERT_TRUE(UpdateResources(node_id, key));
|
||||
heartbeat->set_client_id(node_info->node_id());
|
||||
ASSERT_TRUE(ReportHeartbeat(heartbeat));
|
||||
@@ -1056,7 +1056,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) {
|
||||
ASSERT_TRUE(SubscribeTaskLease(task_id, task_lease_subscribe));
|
||||
|
||||
ASSERT_TRUE(AddTask(task_table_data));
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
auto task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
|
||||
ASSERT_TRUE(AddTaskLease(task_lease));
|
||||
WaitForExpectedCount(task_count, 1);
|
||||
@@ -1065,7 +1065,7 @@ TEST_F(ServiceBasedGcsClientTest, TestTaskTableResubscribe) {
|
||||
|
||||
RestartGcsServer();
|
||||
|
||||
node_id = ClientID::FromRandom();
|
||||
node_id = NodeID::FromRandom();
|
||||
task_lease = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
|
||||
ASSERT_TRUE(AddTaskLease(task_lease));
|
||||
WaitForExpectedCount(task_lease_count, 3);
|
||||
@@ -1096,7 +1096,7 @@ TEST_F(ServiceBasedGcsClientTest, TestWorkerTableResubscribe) {
|
||||
|
||||
TEST_F(ServiceBasedGcsClientTest, TestGcsTableReload) {
|
||||
ObjectID object_id = ObjectID::FromRandom();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
|
||||
// Register node to GCS.
|
||||
auto node_info = Mocker::GenNodeInfo();
|
||||
|
||||
@@ -21,12 +21,12 @@
|
||||
namespace ray {
|
||||
namespace gcs {
|
||||
|
||||
ClientID GcsActor::GetNodeID() const {
|
||||
NodeID GcsActor::GetNodeID() const {
|
||||
const auto &raylet_id_binary = actor_table_data_.address().raylet_id();
|
||||
if (raylet_id_binary.empty()) {
|
||||
return ClientID::Nil();
|
||||
return NodeID::Nil();
|
||||
}
|
||||
return ClientID::FromBinary(raylet_id_binary);
|
||||
return NodeID::FromBinary(raylet_id_binary);
|
||||
}
|
||||
|
||||
void GcsActor::UpdateAddress(const rpc::Address &address) {
|
||||
@@ -47,8 +47,8 @@ WorkerID GcsActor::GetOwnerID() const {
|
||||
return WorkerID::FromBinary(GetOwnerAddress().worker_id());
|
||||
}
|
||||
|
||||
ClientID GcsActor::GetOwnerNodeID() const {
|
||||
return ClientID::FromBinary(GetOwnerAddress().raylet_id());
|
||||
NodeID GcsActor::GetOwnerNodeID() const {
|
||||
return NodeID::FromBinary(GetOwnerAddress().raylet_id());
|
||||
}
|
||||
|
||||
const rpc::Address &GcsActor::GetOwnerAddress() const {
|
||||
@@ -425,7 +425,7 @@ Status GcsActorManager::RegisterActor(const ray::rpc::RegisterActorRequest &requ
|
||||
RAY_CHECK(registered_actors_.emplace(actor->GetActorID(), actor).second);
|
||||
|
||||
const auto &owner_address = actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
|
||||
RAY_CHECK(unresolved_actors_[node_id][worker_id].emplace(actor->GetActorID()).second);
|
||||
|
||||
@@ -649,7 +649,7 @@ void GcsActorManager::DestroyActor(const ActorID &actor_id) {
|
||||
}
|
||||
|
||||
absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerNode(
|
||||
const ClientID &node_id) const {
|
||||
const NodeID &node_id) const {
|
||||
absl::flat_hash_set<ActorID> actor_ids;
|
||||
auto iter = unresolved_actors_.find(node_id);
|
||||
if (iter != unresolved_actors_.end()) {
|
||||
@@ -661,7 +661,7 @@ absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerNode(
|
||||
}
|
||||
|
||||
absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerWorker(
|
||||
const ClientID &node_id, const WorkerID &worker_id) const {
|
||||
const NodeID &node_id, const WorkerID &worker_id) const {
|
||||
absl::flat_hash_set<ActorID> actor_ids;
|
||||
auto iter = unresolved_actors_.find(node_id);
|
||||
if (iter != unresolved_actors_.end()) {
|
||||
@@ -673,7 +673,7 @@ absl::flat_hash_set<ActorID> GcsActorManager::GetUnresolvedActorsByOwnerWorker(
|
||||
return actor_ids;
|
||||
}
|
||||
|
||||
void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id,
|
||||
void GcsActorManager::OnWorkerDead(const ray::NodeID &node_id,
|
||||
const ray::WorkerID &worker_id,
|
||||
bool intentional_exit) {
|
||||
if (intentional_exit) {
|
||||
@@ -727,7 +727,7 @@ void GcsActorManager::OnWorkerDead(const ray::ClientID &node_id,
|
||||
ReconstructActor(actor_id, /*need_reschedule=*/!intentional_exit);
|
||||
}
|
||||
|
||||
void GcsActorManager::OnNodeDead(const ClientID &node_id) {
|
||||
void GcsActorManager::OnNodeDead(const NodeID &node_id) {
|
||||
RAY_LOG(WARNING) << "Node " << node_id << " failed, reconstructing actors.";
|
||||
const auto it = owners_.find(node_id);
|
||||
if (it != owners_.end()) {
|
||||
@@ -913,7 +913,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) {
|
||||
RAY_LOG(INFO) << "Loading initial data.";
|
||||
auto callback = [this,
|
||||
done](const std::unordered_map<ActorID, ActorTableData> &result) {
|
||||
std::unordered_map<ClientID, std::vector<WorkerID>> node_to_workers;
|
||||
std::unordered_map<NodeID, std::vector<WorkerID>> node_to_workers;
|
||||
for (auto &item : result) {
|
||||
if (item.second.state() != ray::rpc::ActorTableData::DEAD) {
|
||||
auto actor = std::make_shared<GcsActor>(item.second);
|
||||
@@ -925,7 +925,7 @@ void GcsActorManager::LoadInitialData(const EmptyCallback &done) {
|
||||
|
||||
if (item.second.state() == ray::rpc::ActorTableData::DEPENDENCIES_UNREADY) {
|
||||
const auto &owner = actor->GetOwnerAddress();
|
||||
const auto &owner_node = ClientID::FromBinary(owner.raylet_id());
|
||||
const auto &owner_node = NodeID::FromBinary(owner.raylet_id());
|
||||
const auto &owner_worker = WorkerID::FromBinary(owner.worker_id());
|
||||
RAY_CHECK(unresolved_actors_[owner_node][owner_worker]
|
||||
.emplace(actor->GetActorID())
|
||||
@@ -1026,7 +1026,7 @@ void GcsActorManager::OnJobFinished(const JobID &job_id) {
|
||||
RAY_CHECK_OK(gcs_table_storage_->ActorTable().GetByJobId(job_id, on_done));
|
||||
}
|
||||
|
||||
const absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>>
|
||||
const absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>>
|
||||
&GcsActorManager::GetCreatedActors() const {
|
||||
return created_actors_;
|
||||
}
|
||||
@@ -1043,7 +1043,7 @@ const absl::flat_hash_map<ActorID, std::vector<RegisterActorCallback>>
|
||||
|
||||
void GcsActorManager::RemoveUnresolvedActor(const std::shared_ptr<GcsActor> &actor) {
|
||||
const auto &owner_address = actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
|
||||
auto iter = unresolved_actors_.find(node_id);
|
||||
if (iter != unresolved_actors_.end()) {
|
||||
|
||||
@@ -63,18 +63,18 @@ class GcsActor {
|
||||
actor_table_data_.set_state(rpc::ActorTableData::DEPENDENCIES_UNREADY);
|
||||
actor_table_data_.mutable_task_spec()->CopyFrom(task_spec);
|
||||
|
||||
actor_table_data_.mutable_address()->set_raylet_id(ClientID::Nil().Binary());
|
||||
actor_table_data_.mutable_address()->set_raylet_id(NodeID::Nil().Binary());
|
||||
actor_table_data_.mutable_address()->set_worker_id(WorkerID::Nil().Binary());
|
||||
}
|
||||
|
||||
/// Get the node id on which this actor is created.
|
||||
ClientID GetNodeID() const;
|
||||
NodeID GetNodeID() const;
|
||||
/// Get the id of the worker on which this actor is created.
|
||||
WorkerID GetWorkerID() const;
|
||||
/// Get the actor's owner ID.
|
||||
WorkerID GetOwnerID() const;
|
||||
/// Get the node ID of the actor's owner.
|
||||
ClientID GetOwnerNodeID() const;
|
||||
NodeID GetOwnerNodeID() const;
|
||||
/// Get the address of the actor's owner.
|
||||
const rpc::Address &GetOwnerAddress() const;
|
||||
|
||||
@@ -248,7 +248,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
|
||||
/// owned an actor, those actors will be destroyed.
|
||||
///
|
||||
/// \param node_id The specified node id.
|
||||
void OnNodeDead(const ClientID &node_id);
|
||||
void OnNodeDead(const NodeID &node_id);
|
||||
|
||||
/// Handle a worker failure. This will restart the associated actor, if any,
|
||||
/// which may be pending or already created. If the worker owned other
|
||||
@@ -258,7 +258,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
|
||||
/// \param worker_id ID of the dead worker.
|
||||
/// \param intentional_exit Whether the death was intentional. If yes and the
|
||||
/// worker was an actor, we should not attempt to restart the actor.
|
||||
void OnWorkerDead(const ClientID &node_id, const WorkerID &worker_id,
|
||||
void OnWorkerDead(const NodeID &node_id, const WorkerID &worker_id,
|
||||
bool intentional_exit = false);
|
||||
|
||||
/// Handle actor creation task failure. This should be called when scheduling
|
||||
@@ -288,7 +288,7 @@ class GcsActorManager : public rpc::ActorInfoHandler {
|
||||
/// Get the created actors.
|
||||
///
|
||||
/// \return The created actors.
|
||||
const absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>>
|
||||
const absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>>
|
||||
&GetCreatedActors() const;
|
||||
|
||||
const absl::flat_hash_map<ActorID, std::shared_ptr<GcsActor>> &GetRegisteredActors()
|
||||
@@ -323,11 +323,11 @@ class GcsActorManager : public rpc::ActorInfoHandler {
|
||||
|
||||
/// Get unresolved actors that were submitted from the specified node.
|
||||
absl::flat_hash_set<ActorID> GetUnresolvedActorsByOwnerNode(
|
||||
const ClientID &node_id) const;
|
||||
const NodeID &node_id) const;
|
||||
|
||||
/// Get unresolved actors that were submitted from the specified worker.
|
||||
absl::flat_hash_set<ActorID> GetUnresolvedActorsByOwnerWorker(
|
||||
const ClientID &node_id, const WorkerID &worker_id) const;
|
||||
const NodeID &node_id, const WorkerID &worker_id) const;
|
||||
|
||||
private:
|
||||
/// Reconstruct the specified actor.
|
||||
@@ -372,18 +372,17 @@ class GcsActorManager : public rpc::ActorInfoHandler {
|
||||
/// Maps from worker ID to a client and the IDs of the actors owned by that worker.
|
||||
/// The actor whose dependencies are not resolved should be destroyed once it creator
|
||||
/// dies.
|
||||
absl::flat_hash_map<ClientID,
|
||||
absl::flat_hash_map<WorkerID, absl::flat_hash_set<ActorID>>>
|
||||
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, absl::flat_hash_set<ActorID>>>
|
||||
unresolved_actors_;
|
||||
/// The pending actors which will not be scheduled until there's a resource change.
|
||||
std::vector<std::shared_ptr<GcsActor>> pending_actors_;
|
||||
/// Map contains the relationship of node and created actors. Each node ID
|
||||
/// maps to a map from worker ID to the actor created on that worker.
|
||||
absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, ActorID>> created_actors_;
|
||||
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, ActorID>> created_actors_;
|
||||
/// Map from worker ID to a client and the IDs of the actors owned by that
|
||||
/// worker. An owned actor should be destroyed once it has gone out of scope,
|
||||
/// according to its owner, or the owner dies.
|
||||
absl::flat_hash_map<ClientID, absl::flat_hash_map<WorkerID, Owner>> owners_;
|
||||
absl::flat_hash_map<NodeID, absl::flat_hash_map<WorkerID, Owner>> owners_;
|
||||
|
||||
/// The scheduler to schedule all registered actors.
|
||||
std::shared_ptr<gcs::GcsActorSchedulerInterface> gcs_actor_scheduler_;
|
||||
|
||||
@@ -89,7 +89,7 @@ void GcsActorScheduler::Reschedule(std::shared_ptr<GcsActor> actor) {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ActorID> GcsActorScheduler::CancelOnNode(const ClientID &node_id) {
|
||||
std::vector<ActorID> GcsActorScheduler::CancelOnNode(const NodeID &node_id) {
|
||||
// Remove all the actors from the map associated with this node, and return them as they
|
||||
// will be reconstructed later.
|
||||
std::vector<ActorID> actor_ids;
|
||||
@@ -124,8 +124,7 @@ std::vector<ActorID> GcsActorScheduler::CancelOnNode(const ClientID &node_id) {
|
||||
return actor_ids;
|
||||
}
|
||||
|
||||
void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id,
|
||||
const ActorID &actor_id) {
|
||||
void GcsActorScheduler::CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) {
|
||||
// NOTE: This method does not currently cancel the outstanding lease request.
|
||||
// It only removes leasing information from the internal state so that
|
||||
// RequestWorkerLease ignores the response from raylet.
|
||||
@@ -134,7 +133,7 @@ void GcsActorScheduler::CancelOnLeasing(const ClientID &node_id,
|
||||
node_it->second.erase(actor_id);
|
||||
}
|
||||
|
||||
ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id,
|
||||
ActorID GcsActorScheduler::CancelOnWorker(const NodeID &node_id,
|
||||
const WorkerID &worker_id) {
|
||||
// Remove the worker from creating map and return ID of the actor associated with the
|
||||
// removed worker if exist, else return NilID.
|
||||
@@ -156,7 +155,7 @@ ActorID GcsActorScheduler::CancelOnWorker(const ClientID &node_id,
|
||||
}
|
||||
|
||||
void GcsActorScheduler::ReleaseUnusedWorkers(
|
||||
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) {
|
||||
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) {
|
||||
// The purpose of this function is to release leased workers that may be leaked.
|
||||
// When GCS restarts, it doesn't know which workers it has leased in the previous
|
||||
// lifecycle. In this case, GCS will send a list of worker ids that are still needed.
|
||||
@@ -192,7 +191,7 @@ void GcsActorScheduler::LeaseWorkerFromNode(std::shared_ptr<GcsActor> actor,
|
||||
std::shared_ptr<rpc::GcsNodeInfo> node) {
|
||||
RAY_CHECK(actor && node);
|
||||
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
RAY_LOG(INFO) << "Start leasing worker from node " << node_id << " for actor "
|
||||
<< actor->GetActorID();
|
||||
|
||||
@@ -277,7 +276,7 @@ void GcsActorScheduler::HandleWorkerLeasedReply(
|
||||
// The worker did not succeed in the lease, but the specified node returned a new
|
||||
// node, and then try again on the new node.
|
||||
RAY_CHECK(!retry_at_raylet_address.raylet_id().empty());
|
||||
auto spill_back_node_id = ClientID::FromBinary(retry_at_raylet_address.raylet_id());
|
||||
auto spill_back_node_id = NodeID::FromBinary(retry_at_raylet_address.raylet_id());
|
||||
if (auto spill_back_node = gcs_node_manager_.GetNode(spill_back_node_id)) {
|
||||
actor->UpdateAddress(retry_at_raylet_address);
|
||||
RAY_CHECK(node_to_actors_when_leasing_[actor->GetNodeID()]
|
||||
@@ -410,7 +409,7 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsActorScheduler::SelectNodeRandomly() const
|
||||
|
||||
std::shared_ptr<WorkerLeaseInterface> GcsActorScheduler::GetOrConnectLeaseClient(
|
||||
const rpc::Address &raylet_address) {
|
||||
auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
|
||||
auto iter = remote_lease_clients_.find(node_id);
|
||||
if (iter == remote_lease_clients_.end()) {
|
||||
auto lease_client = lease_client_factory_(raylet_address);
|
||||
|
||||
@@ -54,26 +54,26 @@ class GcsActorSchedulerInterface {
|
||||
///
|
||||
/// \param node_id ID of the node where the worker is located.
|
||||
/// \return ID list of actors associated with the specified node id.
|
||||
virtual std::vector<ActorID> CancelOnNode(const ClientID &node_id) = 0;
|
||||
virtual std::vector<ActorID> CancelOnNode(const NodeID &node_id) = 0;
|
||||
|
||||
/// Cancel a outstanding leasing request to raylets.
|
||||
///
|
||||
/// \param node_id ID of the node where the actor leasing request has been sent.
|
||||
/// \param actor_id ID of an actor.
|
||||
virtual void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) = 0;
|
||||
virtual void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) = 0;
|
||||
|
||||
/// Cancel the actor that is being scheduled to the specified worker.
|
||||
///
|
||||
/// \param node_id ID of the node where the worker is located.
|
||||
/// \param worker_id ID of the worker that the actor is creating on.
|
||||
/// \return ID of actor associated with the specified node id and worker id.
|
||||
virtual ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) = 0;
|
||||
virtual ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) = 0;
|
||||
|
||||
/// Notify raylets to release unused workers.
|
||||
///
|
||||
/// \param node_to_workers Workers used by each node.
|
||||
virtual void ReleaseUnusedWorkers(
|
||||
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) = 0;
|
||||
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) = 0;
|
||||
|
||||
virtual ~GcsActorSchedulerInterface() {}
|
||||
};
|
||||
@@ -120,7 +120,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
|
||||
///
|
||||
/// \param node_id ID of the node where the worker is located.
|
||||
/// \return ID list of actors associated with the specified node id.
|
||||
std::vector<ActorID> CancelOnNode(const ClientID &node_id) override;
|
||||
std::vector<ActorID> CancelOnNode(const NodeID &node_id) override;
|
||||
|
||||
/// Cancel a outstanding leasing request to raylets.
|
||||
///
|
||||
@@ -130,20 +130,20 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
|
||||
///
|
||||
/// \param node_id ID of the node where the actor leasing request has been sent.
|
||||
/// \param actor_id ID of an actor.
|
||||
void CancelOnLeasing(const ClientID &node_id, const ActorID &actor_id) override;
|
||||
void CancelOnLeasing(const NodeID &node_id, const ActorID &actor_id) override;
|
||||
|
||||
/// Cancel the actor that is being scheduled to the specified worker.
|
||||
///
|
||||
/// \param node_id ID of the node where the worker is located.
|
||||
/// \param worker_id ID of the worker that the actor is creating on.
|
||||
/// \return ID of actor associated with the specified node id and worker id.
|
||||
ActorID CancelOnWorker(const ClientID &node_id, const WorkerID &worker_id) override;
|
||||
ActorID CancelOnWorker(const NodeID &node_id, const WorkerID &worker_id) override;
|
||||
|
||||
/// Notify raylets to release unused workers.
|
||||
///
|
||||
/// \param node_to_workers Workers used by each node.
|
||||
void ReleaseUnusedWorkers(const std::unordered_map<ClientID, std::vector<WorkerID>>
|
||||
&node_to_workers) override;
|
||||
void ReleaseUnusedWorkers(
|
||||
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) override;
|
||||
|
||||
protected:
|
||||
/// The GcsLeasedWorker is kind of abstraction of remote leased worker inside raylet. It
|
||||
@@ -178,7 +178,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
|
||||
WorkerID GetWorkerID() const { return WorkerID::FromBinary(address_.worker_id()); }
|
||||
|
||||
/// Get the NodeID of this leased worker.
|
||||
ClientID GetNodeID() const { return ClientID::FromBinary(address_.raylet_id()); }
|
||||
NodeID GetNodeID() const { return NodeID::FromBinary(address_.raylet_id()); }
|
||||
|
||||
/// Get the id of the actor which is assigned to this leased worker.
|
||||
ActorID GetAssignedActorID() const { return assigned_actor_id_; }
|
||||
@@ -269,15 +269,14 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
|
||||
/// Map from node ID to the set of actors for whom we are trying to acquire a lease from
|
||||
/// that node. This is needed so that we can retry lease requests from the node until we
|
||||
/// receive a reply or the node is removed.
|
||||
absl::flat_hash_map<ClientID, absl::flat_hash_set<ActorID>>
|
||||
node_to_actors_when_leasing_;
|
||||
absl::flat_hash_map<NodeID, absl::flat_hash_set<ActorID>> node_to_actors_when_leasing_;
|
||||
/// Map from node ID to the workers on which we are trying to create actors. This is
|
||||
/// needed so that we can cancel actor creation requests if the worker is removed.
|
||||
absl::flat_hash_map<ClientID,
|
||||
absl::flat_hash_map<NodeID,
|
||||
absl::flat_hash_map<WorkerID, std::shared_ptr<GcsLeasedWorker>>>
|
||||
node_to_workers_when_creating_;
|
||||
/// The cached node clients which are used to communicate with raylet to lease workers.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<WorkerLeaseInterface>>
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<WorkerLeaseInterface>>
|
||||
remote_lease_clients_;
|
||||
/// Reference of GcsNodeManager.
|
||||
const GcsNodeManager &gcs_node_manager_;
|
||||
@@ -290,7 +289,7 @@ class GcsActorScheduler : public GcsActorSchedulerInterface {
|
||||
/// Factory for producing new clients to request leases from remote nodes.
|
||||
LeaseClientFactoryFn lease_client_factory_;
|
||||
/// The nodes which are releasing unused workers.
|
||||
absl::flat_hash_set<ClientID> nodes_of_releasing_unused_workers_;
|
||||
absl::flat_hash_set<NodeID> nodes_of_releasing_unused_workers_;
|
||||
/// The cached core worker clients which are used to communicate with leased worker.
|
||||
rpc::CoreWorkerClientPool core_worker_clients_;
|
||||
};
|
||||
|
||||
@@ -25,7 +25,7 @@ GcsNodeManager::NodeFailureDetector::NodeFailureDetector(
|
||||
boost::asio::io_service &io_service,
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage,
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub,
|
||||
std::function<void(const ClientID &)> on_node_death_callback)
|
||||
std::function<void(const NodeID &)> on_node_death_callback)
|
||||
: gcs_table_storage_(std::move(gcs_table_storage)),
|
||||
on_node_death_callback_(std::move(on_node_death_callback)),
|
||||
num_heartbeats_timeout_(RayConfig::instance().num_heartbeats_timeout()),
|
||||
@@ -40,12 +40,12 @@ void GcsNodeManager::NodeFailureDetector::Start() {
|
||||
}
|
||||
}
|
||||
|
||||
void GcsNodeManager::NodeFailureDetector::AddNode(const ray::ClientID &node_id) {
|
||||
void GcsNodeManager::NodeFailureDetector::AddNode(const ray::NodeID &node_id) {
|
||||
heartbeats_.emplace(node_id, num_heartbeats_timeout_);
|
||||
}
|
||||
|
||||
void GcsNodeManager::NodeFailureDetector::HandleHeartbeat(
|
||||
const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat_data) {
|
||||
const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat_data) {
|
||||
auto iter = heartbeats_.find(node_id);
|
||||
if (iter == heartbeats_.end()) {
|
||||
// Ignore this heartbeat as the node is not registered.
|
||||
@@ -145,7 +145,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service,
|
||||
: main_io_service_(main_io_service),
|
||||
node_failure_detector_(new NodeFailureDetector(
|
||||
node_failure_detector_io_service, gcs_table_storage, gcs_pub_sub,
|
||||
[this](const ClientID &node_id) {
|
||||
[this](const NodeID &node_id) {
|
||||
// Post this to main event loop to avoid potential concurrency issues.
|
||||
main_io_service_.post([this, node_id] {
|
||||
if (auto node = RemoveNode(node_id, /* is_intended = */ false)) {
|
||||
@@ -171,7 +171,7 @@ GcsNodeManager::GcsNodeManager(boost::asio::io_service &main_io_service,
|
||||
void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request,
|
||||
rpc::RegisterNodeReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.node_info().node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_info().node_id());
|
||||
RAY_LOG(INFO) << "Registering node info, node id = " << node_id
|
||||
<< ", address = " << request.node_info().node_manager_address();
|
||||
AddNode(std::make_shared<rpc::GcsNodeInfo>(request.node_info()));
|
||||
@@ -191,7 +191,7 @@ void GcsNodeManager::HandleRegisterNode(const rpc::RegisterNodeRequest &request,
|
||||
void GcsNodeManager::HandleUnregisterNode(const rpc::UnregisterNodeRequest &request,
|
||||
rpc::UnregisterNodeReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
RAY_LOG(INFO) << "Unregistering node info, node id = " << node_id;
|
||||
if (auto node = RemoveNode(node_id, /* is_intended = */ true)) {
|
||||
node->set_state(rpc::GcsNodeInfo::DEAD);
|
||||
@@ -228,7 +228,7 @@ void GcsNodeManager::HandleGetAllNodeInfo(const rpc::GetAllNodeInfoRequest &requ
|
||||
void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &request,
|
||||
rpc::ReportHeartbeatReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.heartbeat().client_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.heartbeat().client_id());
|
||||
auto heartbeat_data = std::make_shared<rpc::HeartbeatTableData>();
|
||||
heartbeat_data->CopyFrom(request.heartbeat());
|
||||
|
||||
@@ -248,7 +248,7 @@ void GcsNodeManager::HandleReportHeartbeat(const rpc::ReportHeartbeatRequest &re
|
||||
void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request,
|
||||
rpc::GetResourcesReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
auto iter = cluster_resources_.find(node_id);
|
||||
if (iter != cluster_resources_.end()) {
|
||||
for (auto &resource : iter->second.items()) {
|
||||
@@ -261,7 +261,7 @@ void GcsNodeManager::HandleGetResources(const rpc::GetResourcesRequest &request,
|
||||
void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &request,
|
||||
rpc::UpdateResourcesReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
RAY_LOG(DEBUG) << "Updating resources, node id = " << node_id;
|
||||
auto iter = cluster_resources_.find(node_id);
|
||||
auto to_be_updated_resources = request.resources();
|
||||
@@ -298,7 +298,7 @@ void GcsNodeManager::HandleUpdateResources(const rpc::UpdateResourcesRequest &re
|
||||
void GcsNodeManager::HandleDeleteResources(const rpc::DeleteResourcesRequest &request,
|
||||
rpc::DeleteResourcesReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
RAY_LOG(DEBUG) << "Deleting node resources, node id = " << node_id;
|
||||
auto resource_names = VectorFromProtobuf(request.resource_name_list());
|
||||
auto iter = cluster_resources_.find(node_id);
|
||||
@@ -355,7 +355,7 @@ void GcsNodeManager::HandleGetInternalConfig(const rpc::GetInternalConfigRequest
|
||||
}
|
||||
|
||||
std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::GetNode(
|
||||
const ray::ClientID &node_id) const {
|
||||
const ray::NodeID &node_id) const {
|
||||
auto iter = alive_nodes_.find(node_id);
|
||||
if (iter == alive_nodes_.end()) {
|
||||
return nullptr;
|
||||
@@ -365,7 +365,7 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::GetNode(
|
||||
}
|
||||
|
||||
void GcsNodeManager::AddNode(std::shared_ptr<rpc::GcsNodeInfo> node) {
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
auto iter = alive_nodes_.find(node_id);
|
||||
if (iter == alive_nodes_.end()) {
|
||||
alive_nodes_.emplace(node_id, node);
|
||||
@@ -385,7 +385,7 @@ void GcsNodeManager::AddNode(std::shared_ptr<rpc::GcsNodeInfo> node) {
|
||||
}
|
||||
|
||||
std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::RemoveNode(
|
||||
const ray::ClientID &node_id, bool is_intended /*= false*/) {
|
||||
const ray::NodeID &node_id, bool is_intended /*= false*/) {
|
||||
RAY_LOG(INFO) << "Removing node, node id = " << node_id;
|
||||
std::shared_ptr<rpc::GcsNodeInfo> removed_node;
|
||||
auto iter = alive_nodes_.find(node_id);
|
||||
@@ -423,8 +423,8 @@ std::shared_ptr<rpc::GcsNodeInfo> GcsNodeManager::RemoveNode(
|
||||
void GcsNodeManager::LoadInitialData(const EmptyCallback &done) {
|
||||
RAY_LOG(INFO) << "Loading initial data.";
|
||||
|
||||
auto get_node_callback = [this, done](
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &result) {
|
||||
auto get_node_callback = [this,
|
||||
done](const std::unordered_map<NodeID, GcsNodeInfo> &result) {
|
||||
for (auto &item : result) {
|
||||
if (item.second.state() == rpc::GcsNodeInfo::ALIVE) {
|
||||
// Call `AddNode` for this node to make sure it is tracked by the failure
|
||||
@@ -436,7 +436,7 @@ void GcsNodeManager::LoadInitialData(const EmptyCallback &done) {
|
||||
}
|
||||
|
||||
auto get_node_resource_callback =
|
||||
[this, done](const std::unordered_map<ClientID, ResourceMap> &result) {
|
||||
[this, done](const std::unordered_map<NodeID, ResourceMap> &result) {
|
||||
for (auto &item : result) {
|
||||
if (alive_nodes_.count(item.first)) {
|
||||
cluster_resources_[item.first] = item.second;
|
||||
@@ -458,13 +458,13 @@ void GcsNodeManager::StartNodeFailureDetector() {
|
||||
}
|
||||
|
||||
void GcsNodeManager::UpdateNodeRealtimeResources(
|
||||
const ClientID &node_id, const rpc::HeartbeatTableData &heartbeat) {
|
||||
const NodeID &node_id, const rpc::HeartbeatTableData &heartbeat) {
|
||||
auto resources_available = MapFromProtobuf(heartbeat.resources_available());
|
||||
cluster_realtime_resources_[node_id] =
|
||||
std::make_shared<ResourceSet>(resources_available);
|
||||
}
|
||||
|
||||
const absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>>
|
||||
const absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>>
|
||||
&GcsNodeManager::GetClusterRealtimeResources() const {
|
||||
return cluster_realtime_resources_;
|
||||
}
|
||||
|
||||
@@ -99,20 +99,20 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
/// \param node_id The ID of the node to be removed.
|
||||
/// \param is_intended False if this is triggered by `node_failure_detector_`, else
|
||||
/// True.
|
||||
std::shared_ptr<rpc::GcsNodeInfo> RemoveNode(const ClientID &node_id,
|
||||
std::shared_ptr<rpc::GcsNodeInfo> RemoveNode(const NodeID &node_id,
|
||||
bool is_intended = false);
|
||||
|
||||
/// Get alive node by ID.
|
||||
///
|
||||
/// \param node_id The id of the node.
|
||||
/// \return the node if it is alive else return nullptr.
|
||||
std::shared_ptr<rpc::GcsNodeInfo> GetNode(const ClientID &node_id) const;
|
||||
std::shared_ptr<rpc::GcsNodeInfo> GetNode(const NodeID &node_id) const;
|
||||
|
||||
/// Get all alive nodes.
|
||||
///
|
||||
/// \return all alive nodes.
|
||||
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>>
|
||||
&GetAllAliveNodes() const {
|
||||
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &GetAllAliveNodes()
|
||||
const {
|
||||
return alive_nodes_;
|
||||
}
|
||||
|
||||
@@ -144,11 +144,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
void StartNodeFailureDetector();
|
||||
|
||||
// Update node realtime resources.
|
||||
void UpdateNodeRealtimeResources(const ClientID &node_id,
|
||||
void UpdateNodeRealtimeResources(const NodeID &node_id,
|
||||
const rpc::HeartbeatTableData &heartbeat);
|
||||
|
||||
/// Get cluster realtime resources.
|
||||
const absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>>
|
||||
const absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>>
|
||||
&GetClusterRealtimeResources() const;
|
||||
|
||||
protected:
|
||||
@@ -165,7 +165,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
boost::asio::io_service &io_service,
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage,
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub,
|
||||
std::function<void(const ClientID &)> on_node_death_callback);
|
||||
std::function<void(const NodeID &)> on_node_death_callback);
|
||||
|
||||
// Note: To avoid heartbeats being delayed by main thread, all public methods below
|
||||
// should be posted to its own IO service.
|
||||
@@ -177,13 +177,13 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
/// Only if the node has registered, its heartbeat data will be accepted.
|
||||
///
|
||||
/// \param node_id ID of the node to be registered.
|
||||
void AddNode(const ClientID &node_id);
|
||||
void AddNode(const NodeID &node_id);
|
||||
|
||||
/// Handle a heartbeat from a Raylet.
|
||||
///
|
||||
/// \param node_id The client ID of the Raylet that sent the heartbeat.
|
||||
/// \param heartbeat_data The heartbeat sent by the client.
|
||||
void HandleHeartbeat(const ClientID &node_id,
|
||||
void HandleHeartbeat(const NodeID &node_id,
|
||||
const rpc::HeartbeatTableData &heartbeat_data);
|
||||
|
||||
protected:
|
||||
@@ -206,7 +206,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
/// Storage for GCS tables.
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
|
||||
/// The callback of node death.
|
||||
std::function<void(const ClientID &)> on_node_death_callback_;
|
||||
std::function<void(const NodeID &)> on_node_death_callback_;
|
||||
/// The number of heartbeats that can be missed before a node is removed.
|
||||
int64_t num_heartbeats_timeout_;
|
||||
// Only the changed part will be included in heartbeat if this is true.
|
||||
@@ -215,9 +215,9 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
boost::asio::deadline_timer detect_timer_;
|
||||
/// For each Raylet that we receive a heartbeat from, the number of ticks
|
||||
/// that may pass before the Raylet will be declared dead.
|
||||
absl::flat_hash_map<ClientID, int64_t> heartbeats_;
|
||||
absl::flat_hash_map<NodeID, int64_t> heartbeats_;
|
||||
/// A buffer containing heartbeats received from node managers in the last tick.
|
||||
absl::flat_hash_map<ClientID, rpc::HeartbeatTableData> heartbeat_buffer_;
|
||||
absl::flat_hash_map<NodeID, rpc::HeartbeatTableData> heartbeat_buffer_;
|
||||
/// A publisher for publishing gcs messages.
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
|
||||
/// Is the detect started.
|
||||
@@ -232,11 +232,11 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
/// The event loop for node failure detector.
|
||||
boost::asio::io_service &node_failure_detector_service_;
|
||||
/// Alive nodes.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> alive_nodes_;
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> alive_nodes_;
|
||||
/// Dead nodes.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> dead_nodes_;
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> dead_nodes_;
|
||||
/// Cluster resources.
|
||||
absl::flat_hash_map<ClientID, rpc::ResourceMap> cluster_resources_;
|
||||
absl::flat_hash_map<NodeID, rpc::ResourceMap> cluster_resources_;
|
||||
/// Listeners which monitors the addition of nodes.
|
||||
std::vector<std::function<void(std::shared_ptr<rpc::GcsNodeInfo>)>>
|
||||
node_added_listeners_;
|
||||
@@ -248,7 +248,7 @@ class GcsNodeManager : public rpc::NodeInfoHandler {
|
||||
/// Storage for GCS tables.
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
|
||||
/// Cluster realtime resources.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<ResourceSet>> cluster_realtime_resources_;
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<ResourceSet>> cluster_realtime_resources_;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
@@ -60,7 +60,7 @@ void GcsObjectManager::HandleAddObjectLocation(
|
||||
const rpc::AddObjectLocationRequest &request, rpc::AddObjectLocationReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ObjectID object_id = ObjectID::FromBinary(request.object_id());
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
RAY_LOG(DEBUG) << "Adding object location, job id = " << object_id.TaskId().JobId()
|
||||
<< ", object id = " << object_id << ", node id = " << node_id;
|
||||
AddObjectLocationInCache(object_id, node_id);
|
||||
@@ -100,7 +100,7 @@ void GcsObjectManager::HandleRemoveObjectLocation(
|
||||
const rpc::RemoveObjectLocationRequest &request,
|
||||
rpc::RemoveObjectLocationReply *reply, rpc::SendReplyCallback send_reply_callback) {
|
||||
ObjectID object_id = ObjectID::FromBinary(request.object_id());
|
||||
ClientID node_id = ClientID::FromBinary(request.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.node_id());
|
||||
RAY_LOG(DEBUG) << "Removing object location, job id = " << object_id.TaskId().JobId()
|
||||
<< ", object id = " << object_id << ", node id = " << node_id;
|
||||
RemoveObjectLocationInCache(object_id, node_id);
|
||||
@@ -143,7 +143,7 @@ void GcsObjectManager::HandleRemoveObjectLocation(
|
||||
}
|
||||
|
||||
void GcsObjectManager::AddObjectsLocation(
|
||||
const ClientID &node_id, const absl::flat_hash_set<ObjectID> &object_ids) {
|
||||
const NodeID &node_id, const absl::flat_hash_set<ObjectID> &object_ids) {
|
||||
// TODO(micafan) Optimize the lock when necessary.
|
||||
// Maybe use read/write lock. Or reduce the granularity of the lock.
|
||||
absl::MutexLock lock(&mutex_);
|
||||
@@ -159,7 +159,7 @@ void GcsObjectManager::AddObjectsLocation(
|
||||
}
|
||||
|
||||
void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id,
|
||||
const ClientID &node_id) {
|
||||
const NodeID &node_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
auto *objects_on_node = GetObjectSetByNode(node_id, /* create_if_not_exist */ true);
|
||||
@@ -170,7 +170,7 @@ void GcsObjectManager::AddObjectLocationInCache(const ObjectID &object_id,
|
||||
object_locations->emplace(node_id);
|
||||
}
|
||||
|
||||
absl::flat_hash_set<ClientID> GcsObjectManager::GetObjectLocations(
|
||||
absl::flat_hash_set<NodeID> GcsObjectManager::GetObjectLocations(
|
||||
const ObjectID &object_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
@@ -178,10 +178,10 @@ absl::flat_hash_set<ClientID> GcsObjectManager::GetObjectLocations(
|
||||
if (object_locations) {
|
||||
return *object_locations;
|
||||
}
|
||||
return absl::flat_hash_set<ClientID>{};
|
||||
return absl::flat_hash_set<NodeID>{};
|
||||
}
|
||||
|
||||
void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) {
|
||||
void GcsObjectManager::OnNodeRemoved(const NodeID &node_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
ObjectSet objects_on_node;
|
||||
@@ -207,7 +207,7 @@ void GcsObjectManager::OnNodeRemoved(const ClientID &node_id) {
|
||||
}
|
||||
|
||||
void GcsObjectManager::RemoveObjectLocationInCache(const ObjectID &object_id,
|
||||
const ClientID &node_id) {
|
||||
const NodeID &node_id) {
|
||||
absl::MutexLock lock(&mutex_);
|
||||
|
||||
auto *object_locations = GetObjectLocationSet(object_id);
|
||||
@@ -244,7 +244,7 @@ GcsObjectManager::LocationSet *GcsObjectManager::GetObjectLocationSet(
|
||||
}
|
||||
|
||||
GcsObjectManager::ObjectSet *GcsObjectManager::GetObjectSetByNode(
|
||||
const ClientID &node_id, bool create_if_not_exist) {
|
||||
const NodeID &node_id, bool create_if_not_exist) {
|
||||
ObjectSet *objects_on_node = nullptr;
|
||||
|
||||
auto it = node_to_objects_.find(node_id);
|
||||
@@ -271,11 +271,11 @@ void GcsObjectManager::LoadInitialData(const EmptyCallback &done) {
|
||||
RAY_LOG(INFO) << "Loading initial data.";
|
||||
auto callback = [this, done](
|
||||
const std::unordered_map<ObjectID, ObjectTableDataList> &result) {
|
||||
absl::flat_hash_map<ClientID, ObjectSet> node_to_objects;
|
||||
absl::flat_hash_map<NodeID, ObjectSet> node_to_objects;
|
||||
for (auto &item : result) {
|
||||
auto object_list = item.second;
|
||||
for (int index = 0; index < object_list.items_size(); ++index) {
|
||||
node_to_objects[ClientID::FromBinary(object_list.items(index).manager())].insert(
|
||||
node_to_objects[NodeID::FromBinary(object_list.items(index).manager())].insert(
|
||||
item.first);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
[this](const std::shared_ptr<rpc::GcsNodeInfo> &node) {
|
||||
// All of the related actors should be reconstructed when a node is removed from
|
||||
// the GCS.
|
||||
OnNodeRemoved(ClientID::FromBinary(node->node_id()));
|
||||
OnNodeRemoved(NodeID::FromBinary(node->node_id()));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -60,14 +60,14 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
void LoadInitialData(const EmptyCallback &done);
|
||||
|
||||
protected:
|
||||
typedef absl::flat_hash_set<ClientID> LocationSet;
|
||||
typedef absl::flat_hash_set<NodeID> LocationSet;
|
||||
|
||||
/// Add a location of objects.
|
||||
/// If the GCS server restarts, this function is used to reload data from storage.
|
||||
///
|
||||
/// \param node_id The object location that will be added.
|
||||
/// \param object_ids The ids of objects which location will be added.
|
||||
void AddObjectsLocation(const ClientID &node_id,
|
||||
void AddObjectsLocation(const NodeID &node_id,
|
||||
const absl::flat_hash_set<ObjectID> &object_ids)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
@@ -75,7 +75,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
///
|
||||
/// \param object_id The id of object.
|
||||
/// \param node_id The node id of the new location.
|
||||
void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id)
|
||||
void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Get all locations of the given object.
|
||||
@@ -87,13 +87,13 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
/// Handler if a node is removed.
|
||||
///
|
||||
/// \param node_id The node that will be removed.
|
||||
void OnNodeRemoved(const ClientID &node_id) LOCKS_EXCLUDED(mutex_);
|
||||
void OnNodeRemoved(const NodeID &node_id) LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
/// Remove object's location.
|
||||
///
|
||||
/// \param object_id The id of the object which location will be removed.
|
||||
/// \param node_id The location that will be removed.
|
||||
void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id)
|
||||
void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id)
|
||||
LOCKS_EXCLUDED(mutex_);
|
||||
|
||||
private:
|
||||
@@ -118,7 +118,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
/// \param node_id The id of node to lookup.
|
||||
/// \param create_if_not_exist Whether to create a new one if not exist.
|
||||
/// \return ObjectSet *
|
||||
GcsObjectManager::ObjectSet *GetObjectSetByNode(const ClientID &node_id,
|
||||
GcsObjectManager::ObjectSet *GetObjectSetByNode(const NodeID &node_id,
|
||||
bool create_if_not_exist = false)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
|
||||
|
||||
@@ -130,7 +130,7 @@ class GcsObjectManager : public rpc::ObjectInfoHandler {
|
||||
|
||||
/// Mapping from node id to objects that held by the node.
|
||||
/// This is the local cache of nodes' objects in the storage.
|
||||
absl::flat_hash_map<ClientID, ObjectSet> node_to_objects_ GUARDED_BY(mutex_);
|
||||
absl::flat_hash_map<NodeID, ObjectSet> node_to_objects_ GUARDED_BY(mutex_);
|
||||
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
|
||||
|
||||
@@ -53,7 +53,7 @@ std::vector<std::shared_ptr<BundleSpecification>> GcsPlacementGroup::GetUnplaced
|
||||
const auto &bundles = placement_group_table_data_.bundles();
|
||||
std::vector<std::shared_ptr<BundleSpecification>> unplaced_bundles;
|
||||
for (auto &bundle : bundles) {
|
||||
if (ClientID::FromBinary(bundle.node_id()).IsNil()) {
|
||||
if (NodeID::FromBinary(bundle.node_id()).IsNil()) {
|
||||
unplaced_bundles.push_back(std::make_shared<BundleSpecification>(bundle));
|
||||
}
|
||||
}
|
||||
@@ -326,7 +326,7 @@ void GcsPlacementGroupManager::RetryCreatingPlacementGroup() {
|
||||
RayConfig::instance().gcs_create_placement_group_retry_interval_ms());
|
||||
}
|
||||
|
||||
void GcsPlacementGroupManager::OnNodeDead(const ClientID &node_id) {
|
||||
void GcsPlacementGroupManager::OnNodeDead(const NodeID &node_id) {
|
||||
RAY_LOG(WARNING) << "Node " << node_id
|
||||
<< " failed, rescheduling the placement groups on the dead node.";
|
||||
auto bundles = gcs_placement_group_scheduler_->GetBundlesOnNode(node_id);
|
||||
|
||||
@@ -167,7 +167,7 @@ class GcsPlacementGroupManager : public rpc::PlacementGroupInfoHandler {
|
||||
/// specified node id.
|
||||
///
|
||||
/// \param node_id The specified node id.
|
||||
void OnNodeDead(const ClientID &node_id);
|
||||
void OnNodeDead(const NodeID &node_id);
|
||||
|
||||
private:
|
||||
/// Try to create placement group after a short time.
|
||||
|
||||
@@ -46,7 +46,7 @@ ScheduleMap GcsStrictPackStrategy::Schedule(
|
||||
|
||||
// Filter candidate nodes.
|
||||
const auto &alive_nodes = context->node_manager_.GetClusterRealtimeResources();
|
||||
std::vector<std::pair<int64_t, ClientID>> candidate_nodes;
|
||||
std::vector<std::pair<int64_t, NodeID>> candidate_nodes;
|
||||
for (auto &node : alive_nodes) {
|
||||
if (required_resources.IsSubset(*node.second)) {
|
||||
candidate_nodes.emplace_back((*context->node_to_bundles_)[node.first], node.first);
|
||||
@@ -61,8 +61,8 @@ ScheduleMap GcsStrictPackStrategy::Schedule(
|
||||
|
||||
std::sort(
|
||||
std::begin(candidate_nodes), std::end(candidate_nodes),
|
||||
[](const std::pair<int64_t, ClientID> &left,
|
||||
const std::pair<int64_t, ClientID> &right) { return left.first < right.first; });
|
||||
[](const std::pair<int64_t, NodeID> &left,
|
||||
const std::pair<int64_t, NodeID> &right) { return left.first < right.first; });
|
||||
|
||||
for (auto &bundle : bundles) {
|
||||
schedule_map[bundle->BundleId()] = candidate_nodes.front().second;
|
||||
@@ -287,7 +287,7 @@ void GcsPlacementGroupScheduler::PrepareResources(
|
||||
const std::shared_ptr<BundleSpecification> &bundle,
|
||||
const std::shared_ptr<ray::rpc::GcsNodeInfo> &node, const StatusCallback &callback) {
|
||||
const auto lease_client = GetLeaseClientFromNode(node);
|
||||
const auto node_id = ClientID::FromBinary(node->node_id());
|
||||
const auto node_id = NodeID::FromBinary(node->node_id());
|
||||
RAY_LOG(INFO) << "Preparing resource from node " << node_id
|
||||
<< " for a bundle: " << bundle->DebugString();
|
||||
lease_client->PrepareBundleResources(
|
||||
@@ -311,7 +311,7 @@ void GcsPlacementGroupScheduler::CommitResources(
|
||||
const std::shared_ptr<ray::rpc::GcsNodeInfo> &node, const StatusCallback callback) {
|
||||
RAY_CHECK(node != nullptr);
|
||||
const auto lease_client = GetLeaseClientFromNode(node);
|
||||
const auto node_id = ClientID::FromBinary(node->node_id());
|
||||
const auto node_id = NodeID::FromBinary(node->node_id());
|
||||
RAY_LOG(INFO) << "Committing resource to a node " << node_id
|
||||
<< " for a bundle: " << bundle->DebugString();
|
||||
lease_client->CommitBundleResources(
|
||||
@@ -339,7 +339,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve(
|
||||
<< " has already removed. Cancellation request will be ignored.";
|
||||
return;
|
||||
}
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
RAY_LOG(INFO) << "Cancelling the resource reserved for bundle: "
|
||||
<< bundle_spec->DebugString() << " at node " << node_id;
|
||||
const auto return_client = GetLeaseClientFromNode(node);
|
||||
@@ -353,7 +353,7 @@ void GcsPlacementGroupScheduler::CancelResourceReserve(
|
||||
|
||||
std::shared_ptr<ResourceReserveInterface>
|
||||
GcsPlacementGroupScheduler::GetOrConnectLeaseClient(const rpc::Address &raylet_address) {
|
||||
auto node_id = ClientID::FromBinary(raylet_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(raylet_address.raylet_id());
|
||||
auto iter = remote_lease_clients_.find(node_id);
|
||||
if (iter == remote_lease_clients_.end()) {
|
||||
auto lease_client = lease_client_factory_(raylet_address);
|
||||
@@ -499,7 +499,7 @@ std::unique_ptr<ScheduleContext> GcsPlacementGroupScheduler::GetScheduleContext(
|
||||
auto &alive_nodes = gcs_node_manager_.GetAllAliveNodes();
|
||||
committed_bundle_location_index_.AddNodes(alive_nodes);
|
||||
|
||||
auto node_to_bundles = std::make_shared<absl::flat_hash_map<ClientID, int64_t>>();
|
||||
auto node_to_bundles = std::make_shared<absl::flat_hash_map<NodeID, int64_t>>();
|
||||
for (const auto &node_it : alive_nodes) {
|
||||
const auto &node_id = node_it.first;
|
||||
const auto &bundle_locations_on_node =
|
||||
@@ -517,7 +517,7 @@ std::unique_ptr<ScheduleContext> GcsPlacementGroupScheduler::GetScheduleContext(
|
||||
}
|
||||
|
||||
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>>
|
||||
GcsPlacementGroupScheduler::GetBundlesOnNode(const ClientID &node_id) {
|
||||
GcsPlacementGroupScheduler::GetBundlesOnNode(const NodeID &node_id) {
|
||||
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> bundles_on_node;
|
||||
const auto &maybe_bundle_locations =
|
||||
committed_bundle_location_index_.GetBundleLocationsOnNode(node_id);
|
||||
@@ -546,7 +546,7 @@ void BundleLocationIndex::AddBundleLocations(
|
||||
}
|
||||
}
|
||||
|
||||
bool BundleLocationIndex::Erase(const ClientID &node_id) {
|
||||
bool BundleLocationIndex::Erase(const NodeID &node_id) {
|
||||
const auto leased_bundles_it = node_to_leased_bundles_.find(node_id);
|
||||
if (leased_bundles_it == node_to_leased_bundles_.end()) {
|
||||
return false;
|
||||
@@ -604,7 +604,7 @@ BundleLocationIndex::GetBundleLocations(const PlacementGroupID &placement_group_
|
||||
}
|
||||
|
||||
const absl::optional<std::shared_ptr<BundleLocations> const>
|
||||
BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) {
|
||||
BundleLocationIndex::GetBundleLocationsOnNode(const NodeID &node_id) {
|
||||
auto it = node_to_leased_bundles_.find(node_id);
|
||||
if (it == node_to_leased_bundles_.end()) {
|
||||
return {};
|
||||
@@ -613,7 +613,7 @@ BundleLocationIndex::GetBundleLocationsOnNode(const ClientID &node_id) {
|
||||
}
|
||||
|
||||
void BundleLocationIndex::AddNodes(
|
||||
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes) {
|
||||
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes) {
|
||||
for (const auto &iter : nodes) {
|
||||
if (!node_to_leased_bundles_.contains(iter.first)) {
|
||||
node_to_leased_bundles_[iter.first] = std::make_shared<BundleLocations>();
|
||||
@@ -630,13 +630,13 @@ LeaseStatusTracker::LeaseStatusTracker(
|
||||
}
|
||||
|
||||
bool LeaseStatusTracker::MarkPreparePhaseStarted(
|
||||
const ClientID &node_id, std::shared_ptr<BundleSpecification> bundle) {
|
||||
const NodeID &node_id, std::shared_ptr<BundleSpecification> bundle) {
|
||||
const auto &bundle_id = bundle->BundleId();
|
||||
return node_to_bundles_when_preparing_[node_id].emplace(bundle_id).second;
|
||||
}
|
||||
|
||||
void LeaseStatusTracker::MarkPrepareRequestReturned(
|
||||
const ClientID &node_id, const std::shared_ptr<BundleSpecification> bundle,
|
||||
const NodeID &node_id, const std::shared_ptr<BundleSpecification> bundle,
|
||||
const Status &status) {
|
||||
RAY_CHECK(prepare_request_returned_count_ <= bundles_to_schedule_.size());
|
||||
auto leasing_bundles = node_to_bundles_when_preparing_.find(node_id);
|
||||
@@ -670,7 +670,7 @@ bool LeaseStatusTracker::AllPrepareRequestsSuccessful() const {
|
||||
}
|
||||
|
||||
void LeaseStatusTracker::MarkCommitRequestReturned(
|
||||
const ClientID &node_id, const std::shared_ptr<BundleSpecification> bundle,
|
||||
const NodeID &node_id, const std::shared_ptr<BundleSpecification> bundle,
|
||||
const Status &status) {
|
||||
commit_request_returned_count_ += 1;
|
||||
// If the request succeeds, record it.
|
||||
|
||||
@@ -37,9 +37,10 @@ struct pair_hash {
|
||||
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
|
||||
}
|
||||
};
|
||||
using ScheduleMap = std::unordered_map<BundleID, ClientID, pair_hash>;
|
||||
using BundleLocations = absl::flat_hash_map<
|
||||
BundleID, std::pair<ClientID, std::shared_ptr<BundleSpecification>>, pair_hash>;
|
||||
using ScheduleMap = std::unordered_map<BundleID, NodeID, pair_hash>;
|
||||
using BundleLocations =
|
||||
absl::flat_hash_map<BundleID, std::pair<NodeID, std::shared_ptr<BundleSpecification>>,
|
||||
pair_hash>;
|
||||
|
||||
class GcsPlacementGroup;
|
||||
|
||||
@@ -60,7 +61,7 @@ class GcsPlacementGroupSchedulerInterface {
|
||||
/// \param node_id ID of the dead node.
|
||||
/// \return The bundles belong to the dead node.
|
||||
virtual absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
|
||||
const ClientID &node_id) = 0;
|
||||
const NodeID &node_id) = 0;
|
||||
|
||||
/// Destroy bundle resources from all nodes in the placement group.
|
||||
virtual void DestroyPlacementGroupBundleResourcesIfExists(
|
||||
@@ -79,7 +80,7 @@ class GcsPlacementGroupSchedulerInterface {
|
||||
/// ScheduleContext provides information that are needed for bundle scheduling decision.
|
||||
class ScheduleContext {
|
||||
public:
|
||||
ScheduleContext(std::shared_ptr<absl::flat_hash_map<ClientID, int64_t>> node_to_bundles,
|
||||
ScheduleContext(std::shared_ptr<absl::flat_hash_map<NodeID, int64_t>> node_to_bundles,
|
||||
const absl::optional<std::shared_ptr<BundleLocations>> bundle_locations,
|
||||
const GcsNodeManager &node_manager)
|
||||
: node_to_bundles_(std::move(node_to_bundles)),
|
||||
@@ -87,7 +88,7 @@ class ScheduleContext {
|
||||
node_manager_(node_manager) {}
|
||||
|
||||
// Key is node id, value is the number of bundles on the node.
|
||||
const std::shared_ptr<absl::flat_hash_map<ClientID, int64_t>> node_to_bundles_;
|
||||
const std::shared_ptr<absl::flat_hash_map<NodeID, int64_t>> node_to_bundles_;
|
||||
// The locations of existing bundles for this placement group.
|
||||
const absl::optional<std::shared_ptr<BundleLocations>> bundle_locations_;
|
||||
|
||||
@@ -158,7 +159,7 @@ class LeaseStatusTracker {
|
||||
/// \param node_id Id of a node where prepare request is sent.
|
||||
/// \param bundle Bundle specification the node is supposed to prepare.
|
||||
/// \return False if the prepare phase was already started. True otherwise.
|
||||
bool MarkPreparePhaseStarted(const ClientID &node_id,
|
||||
bool MarkPreparePhaseStarted(const NodeID &node_id,
|
||||
std::shared_ptr<BundleSpecification> bundle);
|
||||
|
||||
/// Indicate the tracker that all prepare requests are returned.
|
||||
@@ -167,7 +168,7 @@ class LeaseStatusTracker {
|
||||
/// \param bundle Bundle specification the node was supposed to schedule.
|
||||
/// \param status Status of the prepare response.
|
||||
/// \param void
|
||||
void MarkPrepareRequestReturned(const ClientID &node_id,
|
||||
void MarkPrepareRequestReturned(const NodeID &node_id,
|
||||
std::shared_ptr<BundleSpecification> bundle,
|
||||
const Status &status);
|
||||
|
||||
@@ -186,7 +187,7 @@ class LeaseStatusTracker {
|
||||
/// \param node_id Id of a node where commit request is returned.
|
||||
/// \param bundle Bundle specification the node was supposed to schedule.
|
||||
/// \param status Status of the returned commit request.
|
||||
void MarkCommitRequestReturned(const ClientID &node_id,
|
||||
void MarkCommitRequestReturned(const NodeID &node_id,
|
||||
const std::shared_ptr<BundleSpecification> bundle,
|
||||
const Status &status);
|
||||
|
||||
@@ -245,7 +246,7 @@ class LeaseStatusTracker {
|
||||
|
||||
/// Location of bundles that prepare requests were sent.
|
||||
/// If prepare succeeds, the decision will be set as schedule_map[bundles[pos]]
|
||||
/// else will be set ClientID::Nil().
|
||||
/// else will be set NodeID::Nil().
|
||||
std::shared_ptr<BundleLocations> preparing_bundle_locations_;
|
||||
|
||||
/// Number of prepare requests that are returned.
|
||||
@@ -264,7 +265,7 @@ class LeaseStatusTracker {
|
||||
/// from that node. This is needed so that we can retry lease requests from the node
|
||||
/// until we receive a reply or the node is removed.
|
||||
/// TODO(sang): We don't currently handle retry.
|
||||
absl::flat_hash_map<ClientID, absl::flat_hash_set<BundleID>>
|
||||
absl::flat_hash_map<NodeID, absl::flat_hash_set<BundleID>>
|
||||
node_to_bundles_when_preparing_;
|
||||
|
||||
/// Bundles to schedule.
|
||||
@@ -289,7 +290,7 @@ class BundleLocationIndex {
|
||||
///
|
||||
/// \param node_id The id of node.
|
||||
/// \return True if succeed. False otherwise.
|
||||
bool Erase(const ClientID &node_id);
|
||||
bool Erase(const NodeID &node_id);
|
||||
|
||||
/// Erase bundle locations associated with a given placement group id.
|
||||
///
|
||||
@@ -309,19 +310,19 @@ class BundleLocationIndex {
|
||||
/// \param node_id Node id of this bundle locations.
|
||||
/// \return Bundle locations that are associated with a given node id.
|
||||
const absl::optional<std::shared_ptr<BundleLocations> const> GetBundleLocationsOnNode(
|
||||
const ClientID &node_id);
|
||||
const NodeID &node_id);
|
||||
|
||||
/// Update the index to contain new node information. Should be used only when new node
|
||||
/// is added to the cluster.
|
||||
///
|
||||
/// \param alive_nodes map of alive nodes.
|
||||
void AddNodes(
|
||||
const absl::flat_hash_map<ClientID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes);
|
||||
const absl::flat_hash_map<NodeID, std::shared_ptr<rpc::GcsNodeInfo>> &nodes);
|
||||
|
||||
private:
|
||||
/// Map from node ID to the set of bundles. This is used to lookup bundles at each node
|
||||
/// when a node is dead.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<BundleLocations>> node_to_leased_bundles_;
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<BundleLocations>> node_to_leased_bundles_;
|
||||
|
||||
/// A map from placement group id to bundle locations.
|
||||
/// It is used to destroy bundles for the placement group.
|
||||
@@ -382,7 +383,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface {
|
||||
/// \param node_id ID of the dead node.
|
||||
/// \return The bundles belong to the dead node.
|
||||
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
|
||||
const ClientID &node_id) override;
|
||||
const NodeID &node_id) override;
|
||||
|
||||
protected:
|
||||
/// Send a bundle PREPARE request to a node. The PREPARE request will lock resources
|
||||
@@ -459,7 +460,7 @@ class GcsPlacementGroupScheduler : public GcsPlacementGroupSchedulerInterface {
|
||||
const GcsNodeManager &gcs_node_manager_;
|
||||
|
||||
/// The cached node clients which are used to communicate with raylet to lease workers.
|
||||
absl::flat_hash_map<ClientID, std::shared_ptr<ResourceReserveInterface>>
|
||||
absl::flat_hash_map<NodeID, std::shared_ptr<ResourceReserveInterface>>
|
||||
remote_lease_clients_;
|
||||
|
||||
/// Factory for producing new clients to request leases from remote nodes.
|
||||
|
||||
@@ -206,8 +206,8 @@ void GcsServer::InitGcsActorManager() {
|
||||
[this](std::shared_ptr<rpc::GcsNodeInfo> node) {
|
||||
// All of the related placement groups and actors should be reconstructed when a
|
||||
// node is removed from the GCS.
|
||||
gcs_placement_group_manager_->OnNodeDead(ClientID::FromBinary(node->node_id()));
|
||||
gcs_actor_manager_->OnNodeDead(ClientID::FromBinary(node->node_id()));
|
||||
gcs_placement_group_manager_->OnNodeDead(NodeID::FromBinary(node->node_id()));
|
||||
gcs_actor_manager_->OnNodeDead(NodeID::FromBinary(node->node_id()));
|
||||
});
|
||||
|
||||
auto on_subscribe = [this](const std::string &id, const std::string &data) {
|
||||
@@ -215,7 +215,7 @@ void GcsServer::InitGcsActorManager() {
|
||||
worker_failure_data.ParseFromString(data);
|
||||
auto &worker_address = worker_failure_data.worker_address();
|
||||
WorkerID worker_id = WorkerID::FromBinary(id);
|
||||
ClientID node_id = ClientID::FromBinary(worker_address.raylet_id());
|
||||
NodeID node_id = NodeID::FromBinary(worker_address.raylet_id());
|
||||
gcs_actor_manager_->OnWorkerDead(node_id, worker_id,
|
||||
worker_failure_data.intentional_disconnect());
|
||||
};
|
||||
|
||||
@@ -127,10 +127,10 @@ Status GcsTableWithJobId<Key, Data>::BatchDelete(const std::vector<Key> &keys,
|
||||
}
|
||||
|
||||
template class GcsTable<JobID, JobTableData>;
|
||||
template class GcsTable<ClientID, GcsNodeInfo>;
|
||||
template class GcsTable<ClientID, ResourceMap>;
|
||||
template class GcsTable<ClientID, HeartbeatTableData>;
|
||||
template class GcsTable<ClientID, HeartbeatBatchTableData>;
|
||||
template class GcsTable<NodeID, GcsNodeInfo>;
|
||||
template class GcsTable<NodeID, ResourceMap>;
|
||||
template class GcsTable<NodeID, HeartbeatTableData>;
|
||||
template class GcsTable<NodeID, HeartbeatBatchTableData>;
|
||||
template class GcsTable<JobID, ErrorTableData>;
|
||||
template class GcsTable<UniqueID, ProfileTableData>;
|
||||
template class GcsTable<WorkerID, WorkerTableData>;
|
||||
|
||||
@@ -245,7 +245,7 @@ class GcsObjectTable : public GcsTableWithJobId<ObjectID, ObjectTableDataList> {
|
||||
JobID GetJobIdFromKey(const ObjectID &key) override { return key.TaskId().JobId(); }
|
||||
};
|
||||
|
||||
class GcsNodeTable : public GcsTable<ClientID, GcsNodeInfo> {
|
||||
class GcsNodeTable : public GcsTable<NodeID, GcsNodeInfo> {
|
||||
public:
|
||||
explicit GcsNodeTable(std::shared_ptr<StoreClient> &store_client)
|
||||
: GcsTable(store_client) {
|
||||
@@ -253,7 +253,7 @@ class GcsNodeTable : public GcsTable<ClientID, GcsNodeInfo> {
|
||||
}
|
||||
};
|
||||
|
||||
class GcsNodeResourceTable : public GcsTable<ClientID, ResourceMap> {
|
||||
class GcsNodeResourceTable : public GcsTable<NodeID, ResourceMap> {
|
||||
public:
|
||||
explicit GcsNodeResourceTable(std::shared_ptr<StoreClient> &store_client)
|
||||
: GcsTable(store_client) {
|
||||
@@ -261,7 +261,7 @@ class GcsNodeResourceTable : public GcsTable<ClientID, ResourceMap> {
|
||||
}
|
||||
};
|
||||
|
||||
class GcsHeartbeatTable : public GcsTable<ClientID, HeartbeatTableData> {
|
||||
class GcsHeartbeatTable : public GcsTable<NodeID, HeartbeatTableData> {
|
||||
public:
|
||||
explicit GcsHeartbeatTable(std::shared_ptr<StoreClient> &store_client)
|
||||
: GcsTable(store_client) {
|
||||
@@ -277,7 +277,7 @@ class GcsPlacementGroupScheduleTable : public GcsTable<PlacementGroupID, Schedul
|
||||
}
|
||||
};
|
||||
|
||||
class GcsHeartbeatBatchTable : public GcsTable<ClientID, HeartbeatBatchTableData> {
|
||||
class GcsHeartbeatBatchTable : public GcsTable<NodeID, HeartbeatBatchTableData> {
|
||||
public:
|
||||
explicit GcsHeartbeatBatchTable(std::shared_ptr<StoreClient> &store_client)
|
||||
: GcsTable(store_client) {
|
||||
|
||||
@@ -22,7 +22,7 @@ void GcsWorkerManager::HandleReportWorkerFailure(
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
const rpc::Address worker_address = request.worker_failure().worker_address();
|
||||
const auto worker_id = WorkerID::FromBinary(worker_address.worker_id());
|
||||
const auto node_id = ClientID::FromBinary(worker_address.raylet_id());
|
||||
const auto node_id = NodeID::FromBinary(worker_address.raylet_id());
|
||||
std::stringstream log_stream;
|
||||
log_stream << "Reporting worker failure, worker id = " << worker_id
|
||||
<< ", node id = " << node_id
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/gcs/gcs_server/stats_handler_impl.h"
|
||||
|
||||
#include "ray/common/ray_config.h"
|
||||
|
||||
namespace ray {
|
||||
@@ -21,7 +22,7 @@ namespace rpc {
|
||||
void DefaultStatsHandler::HandleAddProfileData(const AddProfileDataRequest &request,
|
||||
AddProfileDataReply *reply,
|
||||
SendReplyCallback send_reply_callback) {
|
||||
ClientID node_id = ClientID::FromBinary(request.profile_data().component_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.profile_data().component_id());
|
||||
RAY_LOG(DEBUG) << "Adding profile data, component type = "
|
||||
<< request.profile_data().component_type() << ", node id = " << node_id;
|
||||
auto profile_table_data = std::make_shared<ProfileTableData>();
|
||||
|
||||
@@ -93,7 +93,7 @@ void DefaultTaskInfoHandler::HandleAddTaskLease(const AddTaskLeaseRequest &reque
|
||||
AddTaskLeaseReply *reply,
|
||||
SendReplyCallback send_reply_callback) {
|
||||
TaskID task_id = TaskID::FromBinary(request.task_lease_data().task_id());
|
||||
ClientID node_id = ClientID::FromBinary(request.task_lease_data().node_manager_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.task_lease_data().node_manager_id());
|
||||
RAY_LOG(DEBUG) << "Adding task lease, job id = " << task_id.JobId()
|
||||
<< ", task id = " << task_id << ", node id = " << node_id;
|
||||
auto on_done = [this, task_id, node_id, request, reply,
|
||||
@@ -144,8 +144,7 @@ void DefaultTaskInfoHandler::HandleAttemptTaskReconstruction(
|
||||
const AttemptTaskReconstructionRequest &request,
|
||||
AttemptTaskReconstructionReply *reply, SendReplyCallback send_reply_callback) {
|
||||
TaskID task_id = TaskID::FromBinary(request.task_reconstruction().task_id());
|
||||
ClientID node_id =
|
||||
ClientID::FromBinary(request.task_reconstruction().node_manager_id());
|
||||
NodeID node_id = NodeID::FromBinary(request.task_reconstruction().node_manager_id());
|
||||
RAY_LOG(DEBUG) << "Reconstructing task, job id = " << task_id.JobId()
|
||||
<< ", task id = " << task_id << ", reconstructions num = "
|
||||
<< request.task_reconstruction().num_reconstructions()
|
||||
|
||||
@@ -31,12 +31,11 @@ class MockActorScheduler : public gcs::GcsActorSchedulerInterface {
|
||||
void Schedule(std::shared_ptr<gcs::GcsActor> actor) { actors.push_back(actor); }
|
||||
void Reschedule(std::shared_ptr<gcs::GcsActor> actor) {}
|
||||
void ReleaseUnusedWorkers(
|
||||
const std::unordered_map<ClientID, std::vector<WorkerID>> &node_to_workers) {}
|
||||
const std::unordered_map<NodeID, std::vector<WorkerID>> &node_to_workers) {}
|
||||
|
||||
MOCK_METHOD1(CancelOnNode, std::vector<ActorID>(const ClientID &node_id));
|
||||
MOCK_METHOD2(CancelOnWorker,
|
||||
ActorID(const ClientID &node_id, const WorkerID &worker_id));
|
||||
MOCK_METHOD2(CancelOnLeasing, void(const ClientID &node_id, const ActorID &actor_id));
|
||||
MOCK_METHOD1(CancelOnNode, std::vector<ActorID>(const NodeID &node_id));
|
||||
MOCK_METHOD2(CancelOnWorker, ActorID(const NodeID &node_id, const WorkerID &worker_id));
|
||||
MOCK_METHOD2(CancelOnLeasing, void(const NodeID &node_id, const ActorID &actor_id));
|
||||
|
||||
std::vector<std::shared_ptr<gcs::GcsActor>> actors;
|
||||
};
|
||||
@@ -121,7 +120,7 @@ class GcsActorManagerTest : public ::testing::Test {
|
||||
|
||||
rpc::Address RandomAddress() const {
|
||||
rpc::Address address;
|
||||
auto node_id = ClientID::FromRandom();
|
||||
auto node_id = NodeID::FromRandom();
|
||||
auto worker_id = WorkerID::FromRandom();
|
||||
address.set_raylet_id(node_id.Binary());
|
||||
address.set_worker_id(worker_id.Binary());
|
||||
@@ -238,7 +237,7 @@ TEST_F(GcsActorManagerTest, TestWorkerFailure) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(address.worker_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
@@ -281,7 +280,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
WaitActorCreated(actor->GetActorID());
|
||||
@@ -289,7 +288,7 @@ TEST_F(GcsActorManagerTest, TestNodeFailure) {
|
||||
|
||||
// Killing another node does not affect this actor.
|
||||
EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_));
|
||||
gcs_actor_manager_->OnNodeDead(ClientID::FromRandom());
|
||||
gcs_actor_manager_->OnNodeDead(NodeID::FromRandom());
|
||||
ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE);
|
||||
|
||||
// Remove node and then check that the actor is dead.
|
||||
@@ -325,7 +324,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
WaitActorCreated(actor->GetActorID());
|
||||
@@ -341,7 +340,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
|
||||
ASSERT_EQ(mock_actor_scheduler_->actors.size(), 1);
|
||||
mock_actor_scheduler_->actors.clear();
|
||||
ASSERT_EQ(finished_actors.size(), 1);
|
||||
auto node_id2 = ClientID::FromRandom();
|
||||
auto node_id2 = NodeID::FromRandom();
|
||||
address.set_raylet_id(node_id2.Binary());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
@@ -352,7 +351,7 @@ TEST_F(GcsActorManagerTest, TestActorReconstruction) {
|
||||
|
||||
// Killing another worker does not affect this actor.
|
||||
EXPECT_CALL(*mock_actor_scheduler_, CancelOnNode(_));
|
||||
gcs_actor_manager_->OnNodeDead(ClientID::FromRandom());
|
||||
gcs_actor_manager_->OnNodeDead(NodeID::FromRandom());
|
||||
ASSERT_EQ(actor->GetState(), rpc::ActorTableData::ALIVE);
|
||||
|
||||
// Remove worker and then check that the actor is dead.
|
||||
@@ -388,7 +387,7 @@ TEST_F(GcsActorManagerTest, TestActorRestartWhenOwnerDead) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
WaitActorCreated(actor->GetActorID());
|
||||
@@ -532,7 +531,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionWorkerFailure) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(address.worker_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
@@ -578,7 +577,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNodeFailure) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
WaitActorCreated(actor->GetActorID());
|
||||
@@ -624,7 +623,7 @@ TEST_F(GcsActorManagerTest, TestNamedActorDeletionNotHappendWhenReconstructed) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
auto address = RandomAddress();
|
||||
auto node_id = ClientID::FromBinary(address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(address.worker_id());
|
||||
actor->UpdateAddress(address);
|
||||
gcs_actor_manager_->OnActorCreationSuccess(actor);
|
||||
@@ -699,7 +698,7 @@ TEST_F(GcsActorManagerTest, TestRaceConditionCancelLease) {
|
||||
|
||||
// Check that the actor is in state `ALIVE`.
|
||||
rpc::Address address;
|
||||
auto node_id = ClientID::FromRandom();
|
||||
auto node_id = NodeID::FromRandom();
|
||||
auto worker_id = WorkerID::FromRandom();
|
||||
address.set_raylet_id(node_id.Binary());
|
||||
address.set_worker_id(worker_id.Binary());
|
||||
@@ -742,7 +741,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeActorDependenciesResolved) {
|
||||
auto job_id = JobID::FromInt(1);
|
||||
auto registered_actor = RegisterActor(job_id);
|
||||
const auto &owner_address = registered_actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
|
||||
gcs_actor_manager_->OnWorkerDead(node_id, worker_id);
|
||||
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
|
||||
@@ -758,7 +757,7 @@ TEST_F(GcsActorManagerTest, TestOwnerWorkerDieBeforeDetachedActorDependenciesRes
|
||||
auto job_id = JobID::FromInt(1);
|
||||
auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true);
|
||||
const auto &owner_address = registered_actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
auto worker_id = WorkerID::FromBinary(owner_address.worker_id());
|
||||
gcs_actor_manager_->OnWorkerDead(node_id, worker_id);
|
||||
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
|
||||
@@ -774,7 +773,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeActorDependenciesResolved) {
|
||||
auto job_id = JobID::FromInt(1);
|
||||
auto registered_actor = RegisterActor(job_id);
|
||||
const auto &owner_address = registered_actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
gcs_actor_manager_->OnNodeDead(node_id);
|
||||
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
|
||||
|
||||
@@ -789,7 +788,7 @@ TEST_F(GcsActorManagerTest, TestOwnerNodeDieBeforeDetachedActorDependenciesResol
|
||||
auto job_id = JobID::FromInt(1);
|
||||
auto registered_actor = RegisterActor(job_id, /*max_restarts=*/1, /*detached=*/true);
|
||||
const auto &owner_address = registered_actor->GetOwnerAddress();
|
||||
auto node_id = ClientID::FromBinary(owner_address.raylet_id());
|
||||
auto node_id = NodeID::FromBinary(owner_address.raylet_id());
|
||||
gcs_actor_manager_->OnNodeDead(node_id);
|
||||
ASSERT_EQ(registered_actor->GetState(), rpc::ActorTableData::DEAD);
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleFailedWithZeroNode) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -103,7 +103,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
|
||||
WorkerID worker_id = WorkerID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
|
||||
node->node_manager_port(), worker_id,
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -119,7 +119,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleActorSuccess) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -138,7 +138,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
|
||||
// Mock a IOError reply, then the lease request will retry again.
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
|
||||
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
|
||||
node_id, ClientID::Nil(), Status::IOError("")));
|
||||
node_id, NodeID::Nil(), Status::IOError("")));
|
||||
ASSERT_EQ(1, gcs_actor_scheduler_->num_retry_leasing_count_);
|
||||
ASSERT_EQ(2, raylet_client_->num_workers_requested);
|
||||
ASSERT_EQ(1, raylet_client_->callbacks.size());
|
||||
@@ -148,7 +148,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
|
||||
WorkerID worker_id = WorkerID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
|
||||
node->node_manager_port(), worker_id,
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -164,7 +164,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenLeasing) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -183,7 +183,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
|
||||
WorkerID worker_id = WorkerID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
|
||||
node->node_manager_port(), worker_id,
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_creating_count_);
|
||||
@@ -205,7 +205,7 @@ TEST_F(GcsActorSchedulerTest, TestScheduleRetryWhenCreating) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -232,7 +232,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
|
||||
// Grant a worker, which will influence nothing.
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
|
||||
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(1, raylet_client_->num_workers_requested);
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_);
|
||||
@@ -243,7 +243,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenLeasing) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -265,7 +265,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
|
||||
// Grant a worker, which will influence nothing.
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
|
||||
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(1, raylet_client_->num_workers_requested);
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(0, gcs_actor_scheduler_->num_retry_leasing_count_);
|
||||
@@ -276,7 +276,7 @@ TEST_F(GcsActorSchedulerTest, TestLeasingCancelledWhenLeasing) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -294,7 +294,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
|
||||
// Grant a worker, then the actor creation request should be send to the worker.
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
|
||||
node->node_manager_address(), node->node_manager_port(), WorkerID::FromRandom(),
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -318,7 +318,7 @@ TEST_F(GcsActorSchedulerTest, TestNodeFailedWhenCreating) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -337,7 +337,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
|
||||
auto worker_id = WorkerID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node->node_manager_address(),
|
||||
node->node_manager_port(), worker_id,
|
||||
node_id, ClientID::Nil()));
|
||||
node_id, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -357,7 +357,7 @@ TEST_F(GcsActorSchedulerTest, TestWorkerFailedWhenCreating) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestSpillback) {
|
||||
auto node1 = Mocker::GenNodeInfo();
|
||||
auto node_id_1 = ClientID::FromBinary(node1->node_id());
|
||||
auto node_id_1 = NodeID::FromBinary(node1->node_id());
|
||||
gcs_node_manager_->AddNode(node1);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -374,12 +374,12 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
|
||||
|
||||
// Add another node.
|
||||
auto node2 = Mocker::GenNodeInfo();
|
||||
auto node_id_2 = ClientID::FromBinary(node2->node_id());
|
||||
auto node_id_2 = NodeID::FromBinary(node2->node_id());
|
||||
gcs_node_manager_->AddNode(node2);
|
||||
ASSERT_EQ(2, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
// Grant with an invalid spillback node, and schedule again.
|
||||
auto invalid_node_id = ClientID::FromBinary(Mocker::GenNodeInfo()->node_id());
|
||||
auto invalid_node_id = NodeID::FromBinary(Mocker::GenNodeInfo()->node_id());
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(
|
||||
node2->node_manager_address(), node2->node_manager_port(), WorkerID::Nil(),
|
||||
node_id_1, invalid_node_id));
|
||||
@@ -400,7 +400,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
|
||||
WorkerID worker_id = WorkerID::FromRandom();
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node2->node_manager_address(),
|
||||
node2->node_manager_port(), worker_id,
|
||||
node_id_2, ClientID::Nil()));
|
||||
node_id_2, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -417,7 +417,7 @@ TEST_F(GcsActorSchedulerTest, TestSpillback) {
|
||||
|
||||
TEST_F(GcsActorSchedulerTest, TestReschedule) {
|
||||
auto node1 = Mocker::GenNodeInfo();
|
||||
auto node_id_1 = ClientID::FromBinary(node1->node_id());
|
||||
auto node_id_1 = NodeID::FromBinary(node1->node_id());
|
||||
gcs_node_manager_->AddNode(node1);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
@@ -452,7 +452,7 @@ TEST_F(GcsActorSchedulerTest, TestReschedule) {
|
||||
// Grant a worker, then the actor creation request should be send to the worker.
|
||||
ASSERT_TRUE(raylet_client_->GrantWorkerLease(node1->node_manager_address(),
|
||||
node1->node_manager_port(), worker_id,
|
||||
node_id_1, ClientID::Nil()));
|
||||
node_id_1, NodeID::Nil()));
|
||||
ASSERT_EQ(0, raylet_client_->callbacks.size());
|
||||
ASSERT_EQ(1, worker_client_->callbacks.size());
|
||||
|
||||
@@ -470,12 +470,12 @@ TEST_F(GcsActorSchedulerTest, TestReleaseUnusedWorkers) {
|
||||
|
||||
// Add a node to the cluster.
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
gcs_node_manager_->AddNode(node);
|
||||
ASSERT_EQ(1, gcs_node_manager_->GetAllAliveNodes().size());
|
||||
|
||||
// Send a `ReleaseUnusedWorkers` request to the node.
|
||||
std::unordered_map<ClientID, std::vector<WorkerID>> node_to_workers;
|
||||
std::unordered_map<NodeID, std::vector<WorkerID>> node_to_workers;
|
||||
node_to_workers[node_id].push_back({WorkerID::FromRandom()});
|
||||
gcs_actor_scheduler_->ReleaseUnusedWorkers(node_to_workers);
|
||||
ASSERT_EQ(1, raylet_client_->num_release_unused_workers);
|
||||
|
||||
@@ -37,7 +37,7 @@ TEST_F(GcsNodeManagerTest, TestManagement) {
|
||||
gcs_table_storage_);
|
||||
// Test Add/Get/Remove functionality.
|
||||
auto node = Mocker::GenNodeInfo();
|
||||
auto node_id = ClientID::FromBinary(node->node_id());
|
||||
auto node_id = NodeID::FromBinary(node->node_id());
|
||||
|
||||
node_manager.AddNode(node);
|
||||
ASSERT_EQ(node, node_manager.GetNode(node_id));
|
||||
@@ -67,7 +67,7 @@ TEST_F(GcsNodeManagerTest, TestListener) {
|
||||
auto &alive_nodes = node_manager.GetAllAliveNodes();
|
||||
ASSERT_EQ(added_nodes.size(), alive_nodes.size());
|
||||
for (const auto &node : added_nodes) {
|
||||
ASSERT_EQ(1, alive_nodes.count(ClientID::FromBinary(node->node_id())));
|
||||
ASSERT_EQ(1, alive_nodes.count(NodeID::FromBinary(node->node_id())));
|
||||
}
|
||||
|
||||
// Test AddNodeRemovedListener.
|
||||
@@ -77,7 +77,7 @@ TEST_F(GcsNodeManagerTest, TestListener) {
|
||||
removed_nodes.emplace_back(std::move(node));
|
||||
});
|
||||
for (int i = 0; i < node_count; ++i) {
|
||||
node_manager.RemoveNode(ClientID::FromBinary(added_nodes[i]->node_id()));
|
||||
node_manager.RemoveNode(NodeID::FromBinary(added_nodes[i]->node_id()));
|
||||
}
|
||||
ASSERT_EQ(node_count, removed_nodes.size());
|
||||
ASSERT_TRUE(node_manager.GetAllAliveNodes().empty());
|
||||
@@ -91,7 +91,7 @@ TEST_F(GcsNodeManagerTest, TestGetClusterRealtimeResources) {
|
||||
gcs::GcsNodeManager node_manager(io_service, io_service, gcs_pub_sub_,
|
||||
gcs_table_storage_);
|
||||
|
||||
auto node_id = ClientID::FromRandom();
|
||||
auto node_id = NodeID::FromRandom();
|
||||
rpc::HeartbeatTableData heartbeat;
|
||||
const std::string cpu_resource = "CPU";
|
||||
(*heartbeat.mutable_resources_available())[cpu_resource] = 10;
|
||||
|
||||
@@ -28,24 +28,24 @@ class MockedGcsObjectManager : public gcs::GcsObjectManager {
|
||||
: gcs::GcsObjectManager(gcs_table_storage, gcs_pub_sub, gcs_node_manager) {}
|
||||
|
||||
public:
|
||||
void AddObjectsLocation(const ClientID &node_id,
|
||||
void AddObjectsLocation(const NodeID &node_id,
|
||||
const absl::flat_hash_set<ObjectID> &object_ids) {
|
||||
gcs::GcsObjectManager::AddObjectsLocation(node_id, object_ids);
|
||||
}
|
||||
|
||||
void AddObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) {
|
||||
void AddObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) {
|
||||
gcs::GcsObjectManager::AddObjectLocationInCache(object_id, node_id);
|
||||
}
|
||||
|
||||
absl::flat_hash_set<ClientID> GetObjectLocations(const ObjectID &object_id) {
|
||||
absl::flat_hash_set<NodeID> GetObjectLocations(const ObjectID &object_id) {
|
||||
return gcs::GcsObjectManager::GetObjectLocations(object_id);
|
||||
}
|
||||
|
||||
void OnNodeRemoved(const ClientID &node_id) {
|
||||
void OnNodeRemoved(const NodeID &node_id) {
|
||||
gcs::GcsObjectManager::OnNodeRemoved(node_id);
|
||||
}
|
||||
|
||||
void RemoveObjectLocationInCache(const ObjectID &object_id, const ClientID &node_id) {
|
||||
void RemoveObjectLocationInCache(const ObjectID &object_id, const NodeID &node_id) {
|
||||
gcs::GcsObjectManager::RemoveObjectLocationInCache(object_id, node_id);
|
||||
}
|
||||
};
|
||||
@@ -67,12 +67,12 @@ class GcsObjectManagerTest : public ::testing::Test {
|
||||
object_ids_.emplace(object_id);
|
||||
}
|
||||
for (size_t i = 0; i < node_count_; ++i) {
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
node_ids_.emplace(node_id);
|
||||
}
|
||||
}
|
||||
|
||||
void CheckLocations(const absl::flat_hash_set<ClientID> &locations) {
|
||||
void CheckLocations(const absl::flat_hash_set<NodeID> &locations) {
|
||||
ASSERT_EQ(locations.size(), node_ids_.size());
|
||||
for (const auto &location : locations) {
|
||||
auto it = node_ids_.find(location);
|
||||
@@ -92,7 +92,7 @@ class GcsObjectManagerTest : public ::testing::Test {
|
||||
size_t object_count_{5};
|
||||
size_t node_count_{10};
|
||||
absl::flat_hash_set<ObjectID> object_ids_;
|
||||
absl::flat_hash_set<ClientID> node_ids_;
|
||||
absl::flat_hash_set<NodeID> node_ids_;
|
||||
};
|
||||
|
||||
TEST_F(GcsObjectManagerTest, AddObjectsLocationAndGetLocationTest) {
|
||||
|
||||
@@ -40,7 +40,7 @@ class MockPlacementGroupScheduler : public gcs::GcsPlacementGroupSchedulerInterf
|
||||
MOCK_METHOD1(MarkScheduleCancelled, void(const PlacementGroupID &placement_group_id));
|
||||
|
||||
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> GetBundlesOnNode(
|
||||
const ClientID &node_id) override {
|
||||
const NodeID &node_id) override {
|
||||
absl::flat_hash_map<PlacementGroupID, std::vector<int64_t>> bundles;
|
||||
bundles[group_on_dead_node_] = bundles_on_dead_node_;
|
||||
return bundles;
|
||||
@@ -306,8 +306,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
|
||||
ASSERT_EQ(finished_placement_group_count, 0);
|
||||
ASSERT_EQ(mock_placement_group_scheduler_->placement_groups_.size(), 1);
|
||||
auto placement_group = mock_placement_group_scheduler_->placement_groups_.back();
|
||||
placement_group->GetMutableBundle(0)->set_node_id(ClientID::FromRandom().Binary());
|
||||
placement_group->GetMutableBundle(1)->set_node_id(ClientID::FromRandom().Binary());
|
||||
placement_group->GetMutableBundle(0)->set_node_id(NodeID::FromRandom().Binary());
|
||||
placement_group->GetMutableBundle(1)->set_node_id(NodeID::FromRandom().Binary());
|
||||
mock_placement_group_scheduler_->placement_groups_.pop_back();
|
||||
|
||||
// If a node dies, we will set the bundles above it to be unplaced and reschedule the
|
||||
@@ -316,7 +316,7 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
|
||||
mock_placement_group_scheduler_->group_on_dead_node_ =
|
||||
placement_group->GetPlacementGroupID();
|
||||
mock_placement_group_scheduler_->bundles_on_dead_node_.push_back(0);
|
||||
gcs_placement_group_manager_->OnNodeDead(ClientID::FromRandom());
|
||||
gcs_placement_group_manager_->OnNodeDead(NodeID::FromRandom());
|
||||
|
||||
// Trigger scheduling `RESCHEDULING` placement group.
|
||||
auto finished_group = std::make_shared<gcs::GcsPlacementGroup>(
|
||||
@@ -328,8 +328,8 @@ TEST_F(GcsPlacementGroupManagerTest, TestRescheduleWhenNodeDead) {
|
||||
placement_group->GetPlacementGroupID());
|
||||
const auto &bundles =
|
||||
mock_placement_group_scheduler_->placement_groups_[0]->GetBundles();
|
||||
EXPECT_TRUE(ClientID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil());
|
||||
EXPECT_FALSE(ClientID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil());
|
||||
EXPECT_TRUE(NodeID::FromBinary(bundles[0]->GetMutableMessage().node_id()).IsNil());
|
||||
EXPECT_FALSE(NodeID::FromBinary(bundles[1]->GetMutableMessage().node_id()).IsNil());
|
||||
|
||||
// If `RESCHEDULING` placement group fails to create, we will schedule it again first.
|
||||
placement_group = mock_placement_group_scheduler_->placement_groups_.back();
|
||||
|
||||
@@ -63,7 +63,7 @@ class GcsPlacementGroupSchedulerTest : public ::testing::Test {
|
||||
gcs_node_manager_->AddNode(node);
|
||||
rpc::HeartbeatTableData heartbeat;
|
||||
(*heartbeat.mutable_resources_available())["CPU"] = cpu_num;
|
||||
gcs_node_manager_->UpdateNodeRealtimeResources(ClientID::FromBinary(node->node_id()),
|
||||
gcs_node_manager_->UpdateNodeRealtimeResources(NodeID::FromBinary(node->node_id()),
|
||||
heartbeat);
|
||||
}
|
||||
|
||||
@@ -489,10 +489,10 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestRescheduleWhenNodeDead) {
|
||||
WaitPendingDone(success_placement_groups_, 1);
|
||||
|
||||
auto bundles_on_node0 =
|
||||
scheduler_->GetBundlesOnNode(ClientID::FromBinary(node0->node_id()));
|
||||
scheduler_->GetBundlesOnNode(NodeID::FromBinary(node0->node_id()));
|
||||
ASSERT_EQ(1, bundles_on_node0.size());
|
||||
auto bundles_on_node1 =
|
||||
scheduler_->GetBundlesOnNode(ClientID::FromBinary(node1->node_id()));
|
||||
scheduler_->GetBundlesOnNode(NodeID::FromBinary(node1->node_id()));
|
||||
ASSERT_EQ(1, bundles_on_node1.size());
|
||||
// One node is dead, reschedule the placement group.
|
||||
auto bundle_on_dead_node = placement_group->GetMutableBundle(0);
|
||||
@@ -543,8 +543,8 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestStrictSpreadStrategyResourceCheck) {
|
||||
TEST_F(GcsPlacementGroupSchedulerTest, TestBundleLocationIndex) {
|
||||
gcs::BundleLocationIndex bundle_location_index;
|
||||
/// Generate data.
|
||||
const auto node1 = ClientID::FromRandom();
|
||||
const auto node2 = ClientID::FromRandom();
|
||||
const auto node1 = NodeID::FromRandom();
|
||||
const auto node2 = NodeID::FromRandom();
|
||||
rpc::CreatePlacementGroupRequest request_pg1 =
|
||||
Mocker::GenCreatePlacementGroupRequest("pg1");
|
||||
const auto pg1_id = PlacementGroupID::FromBinary(
|
||||
@@ -641,7 +641,7 @@ TEST_F(GcsPlacementGroupSchedulerTest, TestNodeDeadDuringCommitResources) {
|
||||
|
||||
scheduler_->ScheduleUnplacedBundles(placement_group, failure_handler, success_handler);
|
||||
ASSERT_TRUE(raylet_clients_[0]->GrantPrepareBundleResources());
|
||||
gcs_node_manager_->RemoveNode(ClientID::FromBinary(node1->node_id()));
|
||||
gcs_node_manager_->RemoveNode(NodeID::FromBinary(node1->node_id()));
|
||||
ASSERT_TRUE(raylet_clients_[1]->GrantPrepareBundleResources());
|
||||
WaitPendingDone(failure_placement_groups_, 1);
|
||||
}
|
||||
|
||||
@@ -671,8 +671,8 @@ TEST_F(GcsServerTest, TestNodeInfo) {
|
||||
TEST_F(GcsServerTest, TestObjectInfo) {
|
||||
// Create object table data
|
||||
ObjectID object_id = ObjectID::FromRandom();
|
||||
ClientID node1_id = ClientID::FromRandom();
|
||||
ClientID node2_id = ClientID::FromRandom();
|
||||
NodeID node1_id = NodeID::FromRandom();
|
||||
NodeID node2_id = NodeID::FromRandom();
|
||||
|
||||
// Add object location
|
||||
rpc::AddObjectLocationRequest add_object_location_request;
|
||||
@@ -720,7 +720,7 @@ TEST_F(GcsServerTest, TestTaskInfo) {
|
||||
ASSERT_TRUE(!result.has_task());
|
||||
|
||||
// Add task lease
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
auto task_lease_data = Mocker::GenTaskLeaseData(task_id.Binary(), node_id.Binary());
|
||||
rpc::AddTaskLeaseRequest add_task_lease_request;
|
||||
add_task_lease_request.mutable_task_lease_data()->CopyFrom(*task_lease_data);
|
||||
@@ -739,7 +739,7 @@ TEST_F(GcsServerTest, TestTaskInfo) {
|
||||
|
||||
TEST_F(GcsServerTest, TestStats) {
|
||||
rpc::ProfileTableData profile_table_data;
|
||||
profile_table_data.set_component_id(ClientID::FromRandom().Binary());
|
||||
profile_table_data.set_component_id(NodeID::FromRandom().Binary());
|
||||
rpc::AddProfileDataRequest add_profile_data_request;
|
||||
add_profile_data_request.mutable_profile_data()->CopyFrom(profile_table_data);
|
||||
ASSERT_TRUE(AddProfileData(add_profile_data_request));
|
||||
|
||||
@@ -89,12 +89,12 @@ struct GcsServerMocker {
|
||||
}
|
||||
|
||||
bool GrantWorkerLease() {
|
||||
return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, ClientID::Nil());
|
||||
return GrantWorkerLease("", 0, WorkerID::FromRandom(), node_id, NodeID::Nil());
|
||||
}
|
||||
|
||||
// Trigger reply to RequestWorkerLease.
|
||||
bool GrantWorkerLease(const std::string &address, int port, const WorkerID &worker_id,
|
||||
const ClientID &raylet_id, const ClientID &retry_at_raylet_id,
|
||||
const NodeID &raylet_id, const NodeID &retry_at_raylet_id,
|
||||
Status status = Status::OK()) {
|
||||
rpc::RequestWorkerLeaseReply reply;
|
||||
if (!retry_at_raylet_id.IsNil()) {
|
||||
@@ -150,7 +150,7 @@ struct GcsServerMocker {
|
||||
int num_workers_disconnected = 0;
|
||||
int num_leases_canceled = 0;
|
||||
int num_release_unused_workers = 0;
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
std::list<rpc::ClientCallback<rpc::RequestWorkerLeaseReply>> callbacks = {};
|
||||
std::list<rpc::ClientCallback<rpc::CancelWorkerLeaseReply>> cancel_callbacks = {};
|
||||
std::list<rpc::ClientCallback<rpc::ReleaseUnusedWorkersReply>> release_callbacks = {};
|
||||
@@ -217,7 +217,7 @@ struct GcsServerMocker {
|
||||
int num_lease_requested = 0;
|
||||
int num_return_requested = 0;
|
||||
int num_commit_requested = 0;
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
std::list<rpc::ClientCallback<rpc::PrepareBundleResourcesReply>> lease_callbacks = {};
|
||||
std::list<rpc::ClientCallback<rpc::CancelResourceReserveReply>> return_callbacks = {};
|
||||
};
|
||||
@@ -289,8 +289,8 @@ struct GcsServerMocker {
|
||||
|
||||
Status UnregisterSelf() override { return Status::NotImplemented(""); }
|
||||
|
||||
const ClientID &GetSelfId() const override {
|
||||
static ClientID node_id;
|
||||
const NodeID &GetSelfId() const override {
|
||||
static NodeID node_id;
|
||||
return node_id;
|
||||
}
|
||||
|
||||
@@ -304,7 +304,7 @@ struct GcsServerMocker {
|
||||
return Status::NotImplemented("");
|
||||
}
|
||||
|
||||
Status AsyncUnregister(const ClientID &node_id,
|
||||
Status AsyncUnregister(const NodeID &node_id,
|
||||
const gcs::StatusCallback &callback) override {
|
||||
if (callback) {
|
||||
callback(Status::OK());
|
||||
@@ -321,34 +321,34 @@ struct GcsServerMocker {
|
||||
}
|
||||
|
||||
Status AsyncSubscribeToNodeChange(
|
||||
const gcs::SubscribeCallback<ClientID, rpc::GcsNodeInfo> &subscribe,
|
||||
const gcs::SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
|
||||
const gcs::StatusCallback &done) override {
|
||||
return Status::NotImplemented("");
|
||||
}
|
||||
|
||||
boost::optional<rpc::GcsNodeInfo> Get(const ClientID &node_id) const override {
|
||||
boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id) const override {
|
||||
return boost::none;
|
||||
}
|
||||
|
||||
const std::unordered_map<ClientID, rpc::GcsNodeInfo> &GetAll() const override {
|
||||
static std::unordered_map<ClientID, rpc::GcsNodeInfo> node_info_list;
|
||||
const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const override {
|
||||
static std::unordered_map<NodeID, rpc::GcsNodeInfo> node_info_list;
|
||||
return node_info_list;
|
||||
}
|
||||
|
||||
bool IsRemoved(const ClientID &node_id) const override { return false; }
|
||||
bool IsRemoved(const NodeID &node_id) const override { return false; }
|
||||
|
||||
Status AsyncGetResources(
|
||||
const ClientID &node_id,
|
||||
const NodeID &node_id,
|
||||
const gcs::OptionalItemCallback<ResourceMap> &callback) override {
|
||||
return Status::NotImplemented("");
|
||||
}
|
||||
|
||||
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
|
||||
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
|
||||
const gcs::StatusCallback &callback) override {
|
||||
return Status::NotImplemented("");
|
||||
}
|
||||
|
||||
Status AsyncDeleteResources(const ClientID &node_id,
|
||||
Status AsyncDeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names,
|
||||
const gcs::StatusCallback &callback) override {
|
||||
return Status::NotImplemented("");
|
||||
@@ -366,7 +366,7 @@ struct GcsServerMocker {
|
||||
}
|
||||
|
||||
Status AsyncSubscribeHeartbeat(
|
||||
const gcs::SubscribeCallback<ClientID, rpc::HeartbeatTableData> &subscribe,
|
||||
const gcs::SubscribeCallback<NodeID, rpc::HeartbeatTableData> &subscribe,
|
||||
const gcs::StatusCallback &done) override {
|
||||
return Status::NotImplemented("");
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ inline std::shared_ptr<ray::rpc::ActorTableData> CreateActorTableData(
|
||||
|
||||
/// Helper function to produce worker failure data.
|
||||
inline std::shared_ptr<ray::rpc::WorkerTableData> CreateWorkerFailureData(
|
||||
const ClientID &raylet_id, const WorkerID &worker_id, const std::string &address,
|
||||
const NodeID &raylet_id, const WorkerID &worker_id, const std::string &address,
|
||||
int32_t port, int64_t timestamp = std::time(nullptr),
|
||||
bool intentional_disconnect = false) {
|
||||
auto worker_failure_info_ptr = std::make_shared<ray::rpc::WorkerTableData>();
|
||||
@@ -104,7 +104,7 @@ inline std::shared_ptr<ray::rpc::WorkerTableData> CreateWorkerFailureData(
|
||||
/// \param is_add Whether the object is appeared on the node.
|
||||
/// \return The object location change created by this method.
|
||||
inline std::shared_ptr<ray::rpc::ObjectLocationChange> CreateObjectLocationChange(
|
||||
const ClientID &node_id, bool is_add) {
|
||||
const NodeID &node_id, bool is_add) {
|
||||
ray::rpc::ObjectTableData object_table_data;
|
||||
object_table_data.set_manager(node_id.Binary());
|
||||
auto object_location_change = std::make_shared<ray::rpc::ObjectLocationChange>();
|
||||
|
||||
@@ -139,8 +139,7 @@ Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return log_based_actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe,
|
||||
done);
|
||||
return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncSubscribe(
|
||||
@@ -316,7 +315,7 @@ Status RedisActorInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return actor_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
|
||||
return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncSubscribe(
|
||||
@@ -361,7 +360,7 @@ Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr<JobTableData> &
|
||||
Status RedisJobInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<JobID, JobTableData> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return job_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
|
||||
return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl)
|
||||
@@ -506,7 +505,7 @@ Status RedisObjectInfoAccessor::AsyncGetLocations(
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
|
||||
const ClientID &node_id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
std::function<void(RedisGcsClient * client, const ObjectID &id,
|
||||
const ObjectTableData &data)>
|
||||
@@ -524,7 +523,7 @@ Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id,
|
||||
const ClientID &node_id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
std::function<void(RedisGcsClient * client, const ObjectID &id,
|
||||
const ObjectTableData &data)>
|
||||
@@ -569,7 +568,7 @@ Status RedisNodeInfoAccessor::UnregisterSelf() {
|
||||
return client_table.Disconnect();
|
||||
}
|
||||
|
||||
const ClientID &RedisNodeInfoAccessor::GetSelfId() const {
|
||||
const NodeID &RedisNodeInfoAccessor::GetSelfId() const {
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
return client_table.GetLocalClientId();
|
||||
}
|
||||
@@ -583,18 +582,18 @@ Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) {
|
||||
ClientTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const GcsNodeInfo &data) { callback(Status::OK()); };
|
||||
}
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
return client_table.MarkConnected(node_info, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
|
||||
Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
ClientTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const GcsNodeInfo &data) { callback(Status::OK()); };
|
||||
}
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
@@ -602,8 +601,7 @@ Status RedisNodeInfoAccessor::AsyncUnregister(const ClientID &node_id,
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
return client_table.SubscribeToNodeChange(subscribe, done);
|
||||
@@ -612,7 +610,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
|
||||
Status RedisNodeInfoAccessor::AsyncGetAll(
|
||||
const MultiItemCallback<GcsNodeInfo> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const ClientID &id,
|
||||
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const std::vector<GcsNodeInfo> &data) {
|
||||
std::vector<GcsNodeInfo> result;
|
||||
std::set<std::string> node_ids;
|
||||
@@ -627,7 +625,7 @@ Status RedisNodeInfoAccessor::AsyncGetAll(
|
||||
return client_table.Lookup(on_done);
|
||||
}
|
||||
|
||||
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const ClientID &node_id) const {
|
||||
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const NodeID &node_id) const {
|
||||
GcsNodeInfo node_info;
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
bool found = client_table.GetClient(node_id, &node_info);
|
||||
@@ -638,12 +636,12 @@ boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const ClientID &node_id)
|
||||
return optional_node;
|
||||
}
|
||||
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
return client_table.GetAllClients();
|
||||
}
|
||||
|
||||
bool RedisNodeInfoAccessor::IsRemoved(const ClientID &node_id) const {
|
||||
bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
|
||||
ClientTable &client_table = client_impl_->client_table();
|
||||
return client_table.IsRemoved(node_id);
|
||||
}
|
||||
@@ -651,11 +649,11 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
|
||||
const std::shared_ptr<HeartbeatTableData> &data_ptr, const StatusCallback &callback) {
|
||||
HeartbeatTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const HeartbeatTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
ClientID node_id = ClientID::FromBinary(data_ptr->client_id());
|
||||
NodeID node_id = NodeID::FromBinary(data_ptr->client_id());
|
||||
HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table();
|
||||
return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done);
|
||||
}
|
||||
@@ -663,15 +661,14 @@ Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
|
||||
void RedisNodeInfoAccessor::AsyncReReportHeartbeat() {}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeHeartbeat(
|
||||
const SubscribeCallback<ClientID, HeartbeatTableData> &subscribe,
|
||||
const SubscribeCallback<NodeID, HeartbeatTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_subscribe = [subscribe](const ClientID &node_id,
|
||||
const HeartbeatTableData &data) {
|
||||
auto on_subscribe = [subscribe](const NodeID &node_id, const HeartbeatTableData &data) {
|
||||
subscribe(node_id, data);
|
||||
};
|
||||
|
||||
return heartbeat_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done);
|
||||
return heartbeat_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat(
|
||||
@@ -679,30 +676,30 @@ Status RedisNodeInfoAccessor::AsyncReportBatchHeartbeat(
|
||||
const StatusCallback &callback) {
|
||||
HeartbeatBatchTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const HeartbeatBatchTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
HeartbeatBatchTable &hb_batch_table = client_impl_->heartbeat_batch_table();
|
||||
return hb_batch_table.Add(JobID::Nil(), ClientID::Nil(), data_ptr, on_done);
|
||||
return hb_batch_table.Add(JobID::Nil(), NodeID::Nil(), data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeBatchHeartbeat(
|
||||
const ItemCallback<HeartbeatBatchTableData> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_subscribe = [subscribe](const ClientID &node_id,
|
||||
auto on_subscribe = [subscribe](const NodeID &node_id,
|
||||
const HeartbeatBatchTableData &data) {
|
||||
subscribe(data);
|
||||
};
|
||||
|
||||
return heartbeat_batch_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe,
|
||||
return heartbeat_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe,
|
||||
done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncGetResources(
|
||||
const ClientID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
|
||||
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const ClientID &id,
|
||||
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const ResourceMap &data) {
|
||||
boost::optional<ResourceMap> result;
|
||||
if (!data.empty()) {
|
||||
@@ -715,12 +712,12 @@ Status RedisNodeInfoAccessor::AsyncGetResources(
|
||||
return resource_table.Lookup(JobID::Nil(), node_id, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id,
|
||||
Status RedisNodeInfoAccessor::AsyncUpdateResources(const NodeID &node_id,
|
||||
const ResourceMap &resources,
|
||||
const StatusCallback &callback) {
|
||||
Hash<ClientID, ResourceTableData>::HashCallback on_done = nullptr;
|
||||
Hash<NodeID, ResourceTableData>::HashCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const ResourceMap &resources) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
@@ -729,11 +726,11 @@ Status RedisNodeInfoAccessor::AsyncUpdateResources(const ClientID &node_id,
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncDeleteResources(
|
||||
const ClientID &node_id, const std::vector<std::string> &resource_names,
|
||||
const NodeID &node_id, const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) {
|
||||
Hash<ClientID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
|
||||
Hash<NodeID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ClientID &node_id,
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names) {
|
||||
callback(Status::OK());
|
||||
};
|
||||
@@ -746,7 +743,7 @@ Status RedisNodeInfoAccessor::AsyncDeleteResources(
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeToResources(
|
||||
const ItemCallback<rpc::NodeResourceChange> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_subscribe = [subscribe](const ClientID &id,
|
||||
auto on_subscribe = [subscribe](const NodeID &id,
|
||||
const ResourceChangeNotification &result) {
|
||||
rpc::NodeResourceChange node_resource_change;
|
||||
node_resource_change.set_node_id(id.Binary());
|
||||
@@ -762,7 +759,7 @@ Status RedisNodeInfoAccessor::AsyncSubscribeToResources(
|
||||
}
|
||||
subscribe(node_resource_change);
|
||||
};
|
||||
return resource_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), on_subscribe, done);
|
||||
return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
|
||||
}
|
||||
|
||||
RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {}
|
||||
@@ -795,7 +792,7 @@ Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures(
|
||||
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return worker_failure_sub_executor_.AsyncSubscribeAll(ClientID::Nil(), subscribe, done);
|
||||
return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure(
|
||||
|
||||
@@ -103,13 +103,13 @@ class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor {
|
||||
|
||||
protected:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
// Use a random ClientID for actor subscription. Because:
|
||||
// If we use ClientID::Nil, GCS will still send all actors' updates to this GCS Client.
|
||||
// Use a random NodeID for actor subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local ClientID, so we use
|
||||
// random ClientID instead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
ClientID subscribe_id_{ClientID::FromRandom()};
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
private:
|
||||
typedef SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>
|
||||
@@ -246,13 +246,13 @@ class RedisTaskInfoAccessor : public TaskInfoAccessor {
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
// Use a random ClientID for task subscription. Because:
|
||||
// If we use ClientID::Nil, GCS will still send all tasks' updates to this GCS Client.
|
||||
// Use a random NodeID for task subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local ClientID, so we use
|
||||
// random ClientID instead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
ClientID subscribe_id_{ClientID::FromRandom()};
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
typedef SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>
|
||||
TaskSubscriptionExecutor;
|
||||
@@ -280,10 +280,10 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor {
|
||||
return Status::NotImplemented("AsyncGetAll not implemented");
|
||||
}
|
||||
|
||||
Status AsyncAddLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncRemoveLocation(const ObjectID &object_id, const ClientID &node_id,
|
||||
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeToLocations(
|
||||
@@ -298,13 +298,13 @@ class RedisObjectInfoAccessor : public ObjectInfoAccessor {
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
// Use a random ClientID for object subscription. Because:
|
||||
// If we use ClientID::Nil, GCS will still send all objects' updates to this GCS Client.
|
||||
// Use a random NodeID for object subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local ClientID, so we use
|
||||
// random ClientID instead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
ClientID subscribe_id_{ClientID::FromRandom()};
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
typedef SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>
|
||||
ObjectSubscriptionExecutor;
|
||||
@@ -324,35 +324,34 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
|
||||
|
||||
Status UnregisterSelf() override;
|
||||
|
||||
const ClientID &GetSelfId() const override;
|
||||
const NodeID &GetSelfId() const override;
|
||||
|
||||
const GcsNodeInfo &GetSelfInfo() const override;
|
||||
|
||||
Status AsyncRegister(const GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncUnregister(const ClientID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
|
||||
|
||||
Status AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
boost::optional<GcsNodeInfo> Get(const ClientID &node_id) const override;
|
||||
boost::optional<GcsNodeInfo> Get(const NodeID &node_id) const override;
|
||||
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &GetAll() const override;
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
|
||||
|
||||
bool IsRemoved(const ClientID &node_id) const override;
|
||||
bool IsRemoved(const NodeID &node_id) const override;
|
||||
|
||||
Status AsyncGetResources(const ClientID &node_id,
|
||||
Status AsyncGetResources(const NodeID &node_id,
|
||||
const OptionalItemCallback<ResourceMap> &callback) override;
|
||||
|
||||
Status AsyncUpdateResources(const ClientID &node_id, const ResourceMap &resources,
|
||||
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncDeleteResources(const ClientID &node_id,
|
||||
Status AsyncDeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
@@ -365,7 +364,7 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
|
||||
void AsyncReReportHeartbeat() override;
|
||||
|
||||
Status AsyncSubscribeHeartbeat(
|
||||
const SubscribeCallback<ClientID, HeartbeatTableData> &subscribe,
|
||||
const SubscribeCallback<NodeID, HeartbeatTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncReportBatchHeartbeat(
|
||||
@@ -392,15 +391,15 @@ class RedisNodeInfoAccessor : public NodeInfoAccessor {
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
typedef SubscriptionExecutor<ClientID, ResourceChangeNotification, DynamicResourceTable>
|
||||
typedef SubscriptionExecutor<NodeID, ResourceChangeNotification, DynamicResourceTable>
|
||||
DynamicResourceSubscriptionExecutor;
|
||||
DynamicResourceSubscriptionExecutor resource_sub_executor_;
|
||||
|
||||
typedef SubscriptionExecutor<ClientID, HeartbeatTableData, HeartbeatTable>
|
||||
typedef SubscriptionExecutor<NodeID, HeartbeatTableData, HeartbeatTable>
|
||||
HeartbeatSubscriptionExecutor;
|
||||
HeartbeatSubscriptionExecutor heartbeat_sub_executor_;
|
||||
|
||||
typedef SubscriptionExecutor<ClientID, HeartbeatBatchTableData, HeartbeatBatchTable>
|
||||
typedef SubscriptionExecutor<NodeID, HeartbeatBatchTableData, HeartbeatBatchTable>
|
||||
HeartbeatBatchSubscriptionExecutor;
|
||||
HeartbeatBatchSubscriptionExecutor heartbeat_batch_sub_executor_;
|
||||
};
|
||||
|
||||
@@ -381,7 +381,7 @@ Status RedisContext::RunArgvAsync(const std::vector<std::string> &args,
|
||||
return status;
|
||||
}
|
||||
|
||||
Status RedisContext::SubscribeAsync(const ClientID &client_id,
|
||||
Status RedisContext::SubscribeAsync(const NodeID &client_id,
|
||||
const TablePubsub pubsub_channel,
|
||||
const RedisCallback &redisCallback,
|
||||
int64_t *out_callback_index) {
|
||||
|
||||
@@ -236,7 +236,7 @@ class RedisContext {
|
||||
/// \param redisCallback The callback function that the notification calls.
|
||||
/// \param out_callback_index The output pointer to callback index.
|
||||
/// \return Status.
|
||||
Status SubscribeAsync(const ClientID &client_id, const TablePubsub pubsub_channel,
|
||||
Status SubscribeAsync(const NodeID &client_id, const TablePubsub pubsub_channel,
|
||||
const RedisCallback &redisCallback, int64_t *out_callback_index);
|
||||
|
||||
/// Subscribes the client to the given pattern.
|
||||
|
||||
@@ -48,8 +48,8 @@ Status RedisGcsClient::Connect(boost::asio::io_service &io_service) {
|
||||
log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this));
|
||||
actor_table_.reset(new ActorTable({primary_context}, this));
|
||||
|
||||
// TODO(micafan) Modify ClientTable' Constructor(remove ClientID) in future.
|
||||
// We will use NodeID instead of ClientID.
|
||||
// TODO(micafan) Modify ClientTable' Constructor(remove NodeID) in future.
|
||||
// We will use NodeID instead of NodeID.
|
||||
// For worker/driver, it might not have this field(NodeID).
|
||||
// For raylet, NodeID should be initialized in raylet layer(not here).
|
||||
client_table_.reset(new ClientTable({primary_context}, this));
|
||||
|
||||
@@ -20,7 +20,7 @@ namespace gcs {
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
|
||||
const ClientID &client_id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const NodeID &client_id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
// TODO(micafan) Optimize the lock when necessary.
|
||||
// Consider avoiding locking in single-threaded processes.
|
||||
@@ -110,9 +110,9 @@ Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
|
||||
const ClientID &client_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const NodeID &client_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(client_id != ClientID::Nil());
|
||||
RAY_CHECK(client_id != NodeID::Nil());
|
||||
|
||||
// NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts,
|
||||
// thus we need to call `RequestNotifications` in the Subscribe callback to ensure
|
||||
@@ -160,7 +160,7 @@ Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncUnsubscribe(
|
||||
const ClientID &client_id, const ID &id, const StatusCallback &done) {
|
||||
const NodeID &client_id, const ID &id, const StatusCallback &done) {
|
||||
SubscribeCallback<ID, Data> subscribe = nullptr;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
@@ -205,11 +205,10 @@ template class SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>;
|
||||
template class SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>;
|
||||
template class SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>,
|
||||
TaskLeaseTable>;
|
||||
template class SubscriptionExecutor<ClientID, ResourceChangeNotification,
|
||||
template class SubscriptionExecutor<NodeID, ResourceChangeNotification,
|
||||
DynamicResourceTable>;
|
||||
template class SubscriptionExecutor<ClientID, HeartbeatTableData, HeartbeatTable>;
|
||||
template class SubscriptionExecutor<ClientID, HeartbeatBatchTableData,
|
||||
HeartbeatBatchTable>;
|
||||
template class SubscriptionExecutor<NodeID, HeartbeatTableData, HeartbeatTable>;
|
||||
template class SubscriptionExecutor<NodeID, HeartbeatBatchTableData, HeartbeatBatchTable>;
|
||||
template class SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>;
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
@@ -46,7 +46,7 @@ class SubscriptionExecutor {
|
||||
/// is registered or updated.
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncSubscribeAll(const ClientID &client_id,
|
||||
Status AsyncSubscribeAll(const NodeID &client_id,
|
||||
const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
@@ -61,7 +61,7 @@ class SubscriptionExecutor {
|
||||
/// is registered or updated.
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncSubscribe(const ClientID &client_id, const ID &id,
|
||||
Status AsyncSubscribe(const NodeID &client_id, const ID &id,
|
||||
const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
@@ -74,7 +74,7 @@ class SubscriptionExecutor {
|
||||
/// \param id The id of the element to be unsubscribed to.
|
||||
/// \param done Callback that will be called when cancel subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncUnsubscribe(const ClientID &client_id, const ID &id,
|
||||
Status AsyncUnsubscribe(const NodeID &client_id, const ID &id,
|
||||
const StatusCallback &done);
|
||||
|
||||
private:
|
||||
|
||||
+25
-26
@@ -137,7 +137,7 @@ Status Log<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id,
|
||||
@@ -150,7 +150,7 @@ Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ == -1)
|
||||
@@ -192,7 +192,7 @@ Status Log<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
|
||||
const ClientID &client_id,
|
||||
const NodeID &client_id,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ >= 0)
|
||||
<< "Client requested notifications on a key before Subscribe completed";
|
||||
@@ -214,7 +214,7 @@ Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::CancelNotifications(const JobID &job_id, const ID &id,
|
||||
const ClientID &client_id,
|
||||
const NodeID &client_id,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ >= 0)
|
||||
<< "Client canceled notifications on a key before Subscribe completed";
|
||||
@@ -315,7 +315,7 @@ Status Table<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe,
|
||||
const FailureCallback &failure,
|
||||
const SubscriptionCallback &done) {
|
||||
@@ -336,7 +336,7 @@ Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
return Subscribe(job_id, client_id, subscribe, /*failure*/ nullptr, done);
|
||||
@@ -379,7 +379,7 @@ Status Set<ID, Data>::Remove(const JobID &job_id, const ID &id,
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Set<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Set<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id,
|
||||
@@ -481,7 +481,7 @@ Status Hash<ID, Data>::Lookup(const JobID &job_id, const ID &id,
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ == -1)
|
||||
@@ -551,7 +551,7 @@ void ClientTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback)
|
||||
|
||||
void ClientTable::HandleNotification(RedisGcsClient *client,
|
||||
const GcsNodeInfo &node_info) {
|
||||
ClientID node_id = ClientID::FromBinary(node_info.node_id());
|
||||
NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
|
||||
// It's possible to get duplicate notifications from the client table, so
|
||||
// check whether this notification is new.
|
||||
@@ -598,14 +598,14 @@ void ClientTable::HandleNotification(RedisGcsClient *client,
|
||||
}
|
||||
}
|
||||
|
||||
const ClientID &ClientTable::GetLocalClientId() const {
|
||||
const NodeID &ClientTable::GetLocalClientId() const {
|
||||
RAY_CHECK(!local_node_id_.IsNil());
|
||||
return local_node_id_;
|
||||
}
|
||||
|
||||
const GcsNodeInfo &ClientTable::GetLocalClient() const { return local_node_info_; }
|
||||
|
||||
bool ClientTable::IsRemoved(const ClientID &node_id) const {
|
||||
bool ClientTable::IsRemoved(const NodeID &node_id) const {
|
||||
return removed_nodes_.count(node_id) == 1;
|
||||
}
|
||||
|
||||
@@ -617,7 +617,7 @@ Status ClientTable::Connect(const GcsNodeInfo &local_node_info) {
|
||||
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info);
|
||||
Status status = SyncAppend(JobID::Nil(), client_log_key_, node_info_ptr);
|
||||
if (status.ok()) {
|
||||
local_node_id_ = ClientID::FromBinary(local_node_info.node_id());
|
||||
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
|
||||
local_node_info_ = local_node_info;
|
||||
}
|
||||
return status;
|
||||
@@ -642,7 +642,7 @@ ray::Status ClientTable::MarkConnected(const GcsNodeInfo &node_info,
|
||||
return Append(JobID::Nil(), client_log_key_, node_info_ptr, done);
|
||||
}
|
||||
|
||||
ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id,
|
||||
ray::Status ClientTable::MarkDisconnected(const NodeID &dead_node_id,
|
||||
const WriteCallback &done) {
|
||||
auto node_info = std::make_shared<GcsNodeInfo>();
|
||||
node_info->set_node_id(dead_node_id.Binary());
|
||||
@@ -651,8 +651,7 @@ ray::Status ClientTable::MarkDisconnected(const ClientID &dead_node_id,
|
||||
}
|
||||
|
||||
ray::Status ClientTable::SubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
|
||||
// Callback for a notification from the client table.
|
||||
auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key,
|
||||
const std::vector<GcsNodeInfo> ¬ifications) {
|
||||
@@ -699,7 +698,7 @@ ray::Status ClientTable::SubscribeToNodeChange(
|
||||
return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done);
|
||||
}
|
||||
|
||||
bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) const {
|
||||
bool ClientTable::GetClient(const NodeID &node_id, GcsNodeInfo *node_info) const {
|
||||
RAY_CHECK(!node_id.IsNil());
|
||||
auto entry = node_cache_.find(node_id);
|
||||
auto found = (entry != node_cache_.end());
|
||||
@@ -709,7 +708,7 @@ bool ClientTable::GetClient(const ClientID &node_id, GcsNodeInfo *node_info) con
|
||||
return found;
|
||||
}
|
||||
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &ClientTable::GetAllClients() const {
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &ClientTable::GetAllClients() const {
|
||||
return node_cache_;
|
||||
}
|
||||
|
||||
@@ -720,13 +719,13 @@ Status ClientTable::Lookup(const Callback &lookup) {
|
||||
|
||||
std::string ClientTable::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << Log<ClientID, GcsNodeInfo>::DebugString();
|
||||
result << Log<NodeID, GcsNodeInfo>::DebugString();
|
||||
result << ", cache size: " << node_cache_.size()
|
||||
<< ", num removed: " << removed_nodes_.size();
|
||||
return result.str();
|
||||
}
|
||||
|
||||
Status TaskLeaseTable::Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id,
|
||||
@@ -865,21 +864,21 @@ template class Table<TaskID, TaskTableData>;
|
||||
template class Log<ActorID, ActorTableData>;
|
||||
template class Log<TaskID, TaskReconstructionData>;
|
||||
template class Table<TaskID, TaskLeaseData>;
|
||||
template class Table<ClientID, HeartbeatTableData>;
|
||||
template class Table<ClientID, HeartbeatBatchTableData>;
|
||||
template class Log<ClientID, GcsNodeInfo>;
|
||||
template class Table<NodeID, HeartbeatTableData>;
|
||||
template class Table<NodeID, HeartbeatBatchTableData>;
|
||||
template class Log<NodeID, GcsNodeInfo>;
|
||||
template class Log<JobID, JobTableData>;
|
||||
template class Log<UniqueID, ProfileTableData>;
|
||||
template class Log<ClientID, HeartbeatTableData>;
|
||||
template class Log<ClientID, HeartbeatBatchTableData>;
|
||||
template class Log<NodeID, HeartbeatTableData>;
|
||||
template class Log<NodeID, HeartbeatBatchTableData>;
|
||||
template class Log<WorkerID, WorkerTableData>;
|
||||
template class Table<ActorCheckpointID, ActorCheckpointData>;
|
||||
template class Table<ActorID, ActorCheckpointIdData>;
|
||||
template class Table<WorkerID, WorkerTableData>;
|
||||
template class Table<ActorID, ActorTableData>;
|
||||
|
||||
template class Log<ClientID, ResourceTableData>;
|
||||
template class Hash<ClientID, ResourceTableData>;
|
||||
template class Log<NodeID, ResourceTableData>;
|
||||
template class Hash<NodeID, ResourceTableData>;
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
|
||||
+30
-30
@@ -71,10 +71,10 @@ template <typename ID>
|
||||
class PubsubInterface {
|
||||
public:
|
||||
virtual Status RequestNotifications(const JobID &job_id, const ID &id,
|
||||
const ClientID &client_id,
|
||||
const NodeID &client_id,
|
||||
const StatusCallback &done) = 0;
|
||||
virtual Status CancelNotifications(const JobID &job_id, const ID &id,
|
||||
const ClientID &client_id,
|
||||
const NodeID &client_id,
|
||||
const StatusCallback &done) = 0;
|
||||
virtual ~PubsubInterface(){};
|
||||
};
|
||||
@@ -195,7 +195,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe, const SubscriptionCallback &done);
|
||||
|
||||
/// Request notifications about a key in this table.
|
||||
@@ -214,8 +214,8 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
|
||||
/// notifications can be requested, a call to `Subscribe` to this
|
||||
/// table with the same `client_id` must complete successfully.
|
||||
/// \return Status
|
||||
Status RequestNotifications(const JobID &job_id, const ID &id,
|
||||
const ClientID &client_id, const StatusCallback &done);
|
||||
Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &client_id,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Cancel notifications about a key in this table.
|
||||
///
|
||||
@@ -224,7 +224,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
|
||||
/// \param client_id The client who originally requested notifications.
|
||||
/// \param done Callback that is called when cancel notifications is complete.
|
||||
/// \return Status
|
||||
Status CancelNotifications(const JobID &job_id, const ID &id, const ClientID &client_id,
|
||||
Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &client_id,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Subscribe to any modifications to the key. The caller may choose
|
||||
@@ -245,7 +245,7 @@ class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
@@ -380,7 +380,7 @@ class Table : private Log<ID, Data>,
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe, const FailureCallback &failure,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
@@ -399,7 +399,7 @@ class Table : private Log<ID, Data>,
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe, const SubscriptionCallback &done);
|
||||
|
||||
void Delete(const JobID &job_id, const ID &id) { Log<ID, Data>::Delete(job_id, id); }
|
||||
@@ -499,7 +499,7 @@ class Set : private Log<ID, Data>,
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
@@ -600,7 +600,7 @@ class HashInterface {
|
||||
/// \param done SubscriptionCallback that is called when subscription is complete and
|
||||
/// we are ready to receive messages.
|
||||
/// \return Status
|
||||
virtual Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
virtual Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) = 0;
|
||||
|
||||
@@ -628,7 +628,7 @@ class Hash : private Log<ID, Data>,
|
||||
Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
|
||||
const HashCallback &done) override;
|
||||
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) override;
|
||||
|
||||
@@ -656,7 +656,7 @@ class Hash : private Log<ID, Data>,
|
||||
using Log<ID, Data>::num_lookups_;
|
||||
};
|
||||
|
||||
class DynamicResourceTable : public Hash<ClientID, ResourceTableData> {
|
||||
class DynamicResourceTable : public Hash<NodeID, ResourceTableData> {
|
||||
public:
|
||||
DynamicResourceTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
@@ -680,7 +680,7 @@ class ObjectTable : public Set<ObjectID, ObjectTableData> {
|
||||
virtual ~ObjectTable(){};
|
||||
};
|
||||
|
||||
class HeartbeatTable : public Table<ClientID, HeartbeatTableData> {
|
||||
class HeartbeatTable : public Table<NodeID, HeartbeatTableData> {
|
||||
public:
|
||||
HeartbeatTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
@@ -691,7 +691,7 @@ class HeartbeatTable : public Table<ClientID, HeartbeatTableData> {
|
||||
virtual ~HeartbeatTable() {}
|
||||
};
|
||||
|
||||
class HeartbeatBatchTable : public Table<ClientID, HeartbeatBatchTableData> {
|
||||
class HeartbeatBatchTable : public Table<NodeID, HeartbeatBatchTableData> {
|
||||
public:
|
||||
HeartbeatBatchTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
@@ -807,7 +807,7 @@ class TaskLeaseTable : public Table<TaskID, TaskLeaseData> {
|
||||
|
||||
/// Implement this method for the subscription tools class SubscriptionExecutor.
|
||||
/// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor.
|
||||
Status Subscribe(const JobID &job_id, const ClientID &client_id,
|
||||
Status Subscribe(const JobID &job_id, const NodeID &client_id,
|
||||
const Callback &subscribe, const SubscriptionCallback &done);
|
||||
};
|
||||
|
||||
@@ -882,8 +882,8 @@ class ProfileTable : public Log<UniqueID, ProfileTableData> {
|
||||
/// alive. When a client disconnects, or if another client detects its failure,
|
||||
/// it should append an entry to the log indicating that it is dead. A client
|
||||
/// that is marked as dead should never again be marked as alive; if it needs
|
||||
/// to reconnect, it must connect with a different ClientID.
|
||||
class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
/// to reconnect, it must connect with a different NodeID.
|
||||
class ClientTable : public Log<NodeID, GcsNodeInfo> {
|
||||
public:
|
||||
ClientTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
@@ -920,10 +920,10 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
/// \param done Callback that is called once the node has been marked to
|
||||
/// disconnected.
|
||||
/// \return Status
|
||||
ray::Status MarkDisconnected(const ClientID &dead_node_id, const WriteCallback &done);
|
||||
ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done);
|
||||
|
||||
ray::Status SubscribeToNodeChange(
|
||||
const SubscribeCallback<ClientID, GcsNodeInfo> &subscribe,
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Get a client's information from the cache. The cache only contains
|
||||
@@ -934,12 +934,12 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
/// we have the client in the cache.
|
||||
/// a nil client ID.
|
||||
/// \return Whether teh client is in the cache.
|
||||
bool GetClient(const ClientID &client, GcsNodeInfo *node_info) const;
|
||||
bool GetClient(const NodeID &client, GcsNodeInfo *node_info) const;
|
||||
|
||||
/// Get the local client's ID.
|
||||
///
|
||||
/// \return The local client's ID.
|
||||
const ClientID &GetLocalClientId() const;
|
||||
const NodeID &GetLocalClientId() const;
|
||||
|
||||
/// Get the local client's information.
|
||||
///
|
||||
@@ -950,12 +950,12 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
///
|
||||
/// \param node_id The ID of the client to check.
|
||||
/// \return Whether the client with ID client_id is removed.
|
||||
bool IsRemoved(const ClientID &node_id) const;
|
||||
bool IsRemoved(const NodeID &node_id) const;
|
||||
|
||||
/// Get the information of all clients.
|
||||
///
|
||||
/// \return The client ID to client information map.
|
||||
const std::unordered_map<ClientID, GcsNodeInfo> &GetAllClients() const;
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAllClients() const;
|
||||
|
||||
/// Lookup the client data in the client table.
|
||||
///
|
||||
@@ -972,11 +972,11 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
/// The key at which the log of client information is stored. This key must
|
||||
/// be kept the same across all instances of the ClientTable, so that all
|
||||
/// clients append and read from the same key.
|
||||
ClientID client_log_key_;
|
||||
NodeID client_log_key_;
|
||||
|
||||
private:
|
||||
using NodeChangeCallback =
|
||||
std::function<void(const ClientID &id, const GcsNodeInfo &node_info)>;
|
||||
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
|
||||
|
||||
/// Register a callback to call when a new node is added or a node is removed.
|
||||
///
|
||||
@@ -989,20 +989,20 @@ class ClientTable : public Log<ClientID, GcsNodeInfo> {
|
||||
/// Whether this client has called Disconnect().
|
||||
bool disconnected_{false};
|
||||
/// This node's ID. It will be initialized when we call method `Connect(...)`.
|
||||
ClientID local_node_id_;
|
||||
NodeID local_node_id_;
|
||||
/// Information about this node.
|
||||
GcsNodeInfo local_node_info_;
|
||||
/// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and
|
||||
/// RequestNotification.
|
||||
/// The reason for not using `local_node_id_` is because it is only initialized
|
||||
/// for registered nodes.
|
||||
ClientID subscribe_id_{ClientID::FromRandom()};
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
/// The callback to call when a new node is added or a node is removed.
|
||||
NodeChangeCallback node_change_callback_{nullptr};
|
||||
/// A cache for information about all nodes.
|
||||
std::unordered_map<ClientID, GcsNodeInfo> node_cache_;
|
||||
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
|
||||
/// The set of removed nodes.
|
||||
std::unordered_set<ClientID> removed_nodes_;
|
||||
std::unordered_set<NodeID> removed_nodes_;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
@@ -51,7 +51,7 @@ struct Mocker {
|
||||
bool detached = false,
|
||||
const std::string name = "") {
|
||||
rpc::Address owner_address;
|
||||
owner_address.set_raylet_id(ClientID::FromRandom().Binary());
|
||||
owner_address.set_raylet_id(NodeID::FromRandom().Binary());
|
||||
owner_address.set_ip_address("1234");
|
||||
owner_address.set_port(5678);
|
||||
owner_address.set_worker_id(WorkerID::FromRandom().Binary());
|
||||
@@ -67,7 +67,7 @@ struct Mocker {
|
||||
bool detached = false,
|
||||
const std::string name = "") {
|
||||
rpc::Address owner_address;
|
||||
owner_address.set_raylet_id(ClientID::FromRandom().Binary());
|
||||
owner_address.set_raylet_id(NodeID::FromRandom().Binary());
|
||||
owner_address.set_ip_address("1234");
|
||||
owner_address.set_port(5678);
|
||||
owner_address.set_worker_id(WorkerID::FromRandom().Binary());
|
||||
@@ -109,7 +109,7 @@ struct Mocker {
|
||||
static std::shared_ptr<rpc::GcsNodeInfo> GenNodeInfo(
|
||||
uint16_t port = 0, const std::string address = "127.0.0.1") {
|
||||
auto node = std::make_shared<rpc::GcsNodeInfo>();
|
||||
node->set_node_id(ClientID::FromRandom().Binary());
|
||||
node->set_node_id(NodeID::FromRandom().Binary());
|
||||
node->set_node_manager_port(port);
|
||||
node->set_node_manager_address(address);
|
||||
return node;
|
||||
@@ -158,7 +158,7 @@ struct Mocker {
|
||||
}
|
||||
|
||||
static std::shared_ptr<rpc::ProfileTableData> GenProfileTableData(
|
||||
const ClientID &node_id) {
|
||||
const NodeID &node_id) {
|
||||
auto profile_table_data = std::make_shared<rpc::ProfileTableData>();
|
||||
profile_table_data->set_component_id(node_id.Binary());
|
||||
return profile_table_data;
|
||||
|
||||
@@ -70,7 +70,7 @@ class TestGcs : public ::testing::Test {
|
||||
};
|
||||
|
||||
TestGcs *test;
|
||||
ClientID local_client_id = ClientID::FromRandom();
|
||||
NodeID local_client_id = NodeID::FromRandom();
|
||||
|
||||
class TestGcsWithAsio : public TestGcs {
|
||||
public:
|
||||
@@ -666,7 +666,7 @@ class SetTestHelper {
|
||||
// subscribed, we will append to the key several times and check that we get
|
||||
// notified for each.
|
||||
RAY_CHECK_OK(client->object_table().Subscribe(
|
||||
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
|
||||
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
|
||||
|
||||
// Run the event loop. The loop will only stop if the registered subscription
|
||||
// callback is called (or an assertion failure).
|
||||
@@ -1005,7 +1005,7 @@ class LogSubscribeTestHelper {
|
||||
// subscribed, we will append to the key several times and check that we get
|
||||
// notified for each.
|
||||
RAY_CHECK_OK(client->job_table().Subscribe(
|
||||
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
|
||||
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
|
||||
|
||||
// Run the event loop. The loop will only stop if the registered subscription
|
||||
// callback is called (or an assertion failure).
|
||||
@@ -1187,16 +1187,16 @@ TEST_F(TestGcsWithAsio, TestSetSubscribeCancel) {
|
||||
class ClientTableTestHelper {
|
||||
public:
|
||||
static void ClientTableNotification(std::shared_ptr<gcs::RedisGcsClient> client,
|
||||
const ClientID &client_id, const GcsNodeInfo &data,
|
||||
const NodeID &client_id, const GcsNodeInfo &data,
|
||||
bool is_alive) {
|
||||
ClientID added_id = local_client_id;
|
||||
NodeID added_id = local_client_id;
|
||||
ASSERT_EQ(client_id, added_id);
|
||||
ASSERT_EQ(ClientID::FromBinary(data.node_id()), added_id);
|
||||
ASSERT_EQ(NodeID::FromBinary(data.node_id()), added_id);
|
||||
ASSERT_EQ(data.state() == GcsNodeInfo::ALIVE, is_alive);
|
||||
|
||||
GcsNodeInfo cached_client;
|
||||
ASSERT_TRUE(client->client_table().GetClient(added_id, &cached_client));
|
||||
ASSERT_EQ(ClientID::FromBinary(cached_client.node_id()), added_id);
|
||||
ASSERT_EQ(NodeID::FromBinary(cached_client.node_id()), added_id);
|
||||
ASSERT_EQ(cached_client.state() == GcsNodeInfo::ALIVE, is_alive);
|
||||
}
|
||||
|
||||
@@ -1205,7 +1205,7 @@ class ClientTableTestHelper {
|
||||
// Subscribe to a node gets added and removed. The latter
|
||||
// event will stop the event loop.
|
||||
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
|
||||
[client](const ClientID &id, const GcsNodeInfo &data) {
|
||||
[client](const NodeID &id, const GcsNodeInfo &data) {
|
||||
// TODO(micafan)
|
||||
RAY_LOG(INFO) << "Test alive=" << data.state() << " id=" << id;
|
||||
if (data.state() == GcsNodeInfo::ALIVE) {
|
||||
@@ -1231,7 +1231,7 @@ class ClientTableTestHelper {
|
||||
// Register callbacks for when a client gets added and removed. The latter
|
||||
// event will stop the event loop.
|
||||
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
|
||||
[client](const ClientID &id, const GcsNodeInfo &data) {
|
||||
[client](const NodeID &id, const GcsNodeInfo &data) {
|
||||
if (data.state() == GcsNodeInfo::ALIVE) {
|
||||
ClientTableNotification(client, id, data, /*is_insertion=*/true);
|
||||
// Disconnect from the client table. We should receive a notification
|
||||
@@ -1260,7 +1260,7 @@ class ClientTableTestHelper {
|
||||
// Register callbacks for when a client gets added and removed. The latter
|
||||
// event will stop the event loop.
|
||||
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
|
||||
[client](const ClientID &id, const GcsNodeInfo &data) {
|
||||
[client](const NodeID &id, const GcsNodeInfo &data) {
|
||||
if (data.state() == GcsNodeInfo::ALIVE) {
|
||||
ClientTableNotification(client, id, data, true);
|
||||
} else {
|
||||
@@ -1291,14 +1291,14 @@ class ClientTableTestHelper {
|
||||
// Connect to the client table to start receiving notifications.
|
||||
RAY_CHECK_OK(client->client_table().Connect(local_node_info));
|
||||
// Mark a different client as dead.
|
||||
ClientID dead_client_id = ClientID::FromRandom();
|
||||
NodeID dead_client_id = NodeID::FromRandom();
|
||||
RAY_CHECK_OK(client->client_table().MarkDisconnected(dead_client_id, nullptr));
|
||||
// Make sure we only get a notification for the removal of the client we
|
||||
// marked as dead.
|
||||
RAY_CHECK_OK(client->client_table().SubscribeToNodeChange(
|
||||
[dead_client_id](const UniqueID &id, const GcsNodeInfo &data) {
|
||||
if (data.state() == GcsNodeInfo::DEAD) {
|
||||
ASSERT_EQ(ClientID::FromBinary(data.node_id()), dead_client_id);
|
||||
ASSERT_EQ(NodeID::FromBinary(data.node_id()), dead_client_id);
|
||||
test->Stop();
|
||||
}
|
||||
},
|
||||
@@ -1332,7 +1332,7 @@ class HashTableTestHelper {
|
||||
static void TestHashTable(const JobID &job_id,
|
||||
std::shared_ptr<gcs::RedisGcsClient> client) {
|
||||
uint64_t expected_count = 14;
|
||||
ClientID client_id = ClientID::FromRandom();
|
||||
NodeID client_id = NodeID::FromRandom();
|
||||
// Prepare the first resource map: data_map1.
|
||||
DynamicResourceTable::DataMap data_map1;
|
||||
auto cpu_data = std::make_shared<ResourceTableData>();
|
||||
@@ -1370,7 +1370,7 @@ class HashTableTestHelper {
|
||||
};
|
||||
auto notification_callback =
|
||||
[data_map1, data_map2, compare_test, expected_count](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const std::vector<ResourceChangeNotification> &result) {
|
||||
RAY_CHECK(result.size() == 1);
|
||||
const ResourceChangeNotification ¬ification = result.back();
|
||||
@@ -1399,13 +1399,13 @@ class HashTableTestHelper {
|
||||
};
|
||||
// Step 0: Subscribe the change of the hash table.
|
||||
RAY_CHECK_OK(client->resource_table().Subscribe(
|
||||
job_id, ClientID::Nil(), notification_callback, subscribe_callback));
|
||||
job_id, NodeID::Nil(), notification_callback, subscribe_callback));
|
||||
RAY_CHECK_OK(client->resource_table().RequestNotifications(job_id, client_id,
|
||||
local_client_id, nullptr));
|
||||
|
||||
// Step 1: Add elements to the hash table.
|
||||
auto update_callback1 = [data_map1, compare_test](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
compare_test(data_map1, callback_data);
|
||||
test->IncrementNumCallbacks();
|
||||
@@ -1413,7 +1413,7 @@ class HashTableTestHelper {
|
||||
RAY_CHECK_OK(
|
||||
client->resource_table().Update(job_id, client_id, data_map1, update_callback1));
|
||||
auto lookup_callback1 = [data_map1, compare_test](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
compare_test(data_map1, callback_data);
|
||||
test->IncrementNumCallbacks();
|
||||
@@ -1423,14 +1423,14 @@ class HashTableTestHelper {
|
||||
// Step 2: Decrease one element, increase one and add a new one.
|
||||
RAY_CHECK_OK(client->resource_table().Update(job_id, client_id, data_map2, nullptr));
|
||||
auto lookup_callback2 = [data_map2, compare_test](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
compare_test(data_map2, callback_data);
|
||||
test->IncrementNumCallbacks();
|
||||
};
|
||||
RAY_CHECK_OK(client->resource_table().Lookup(job_id, client_id, lookup_callback2));
|
||||
std::vector<std::string> delete_keys({"GPU", "CUSTOM", "None-Existent"});
|
||||
auto remove_callback = [delete_keys](RedisGcsClient *client, const ClientID &id,
|
||||
auto remove_callback = [delete_keys](RedisGcsClient *client, const NodeID &id,
|
||||
const std::vector<std::string> &callback_data) {
|
||||
for (size_t i = 0; i < callback_data.size(); ++i) {
|
||||
// All deleting keys exist in this argument even if the key doesn't exist.
|
||||
@@ -1444,7 +1444,7 @@ class HashTableTestHelper {
|
||||
data_map3.erase("GPU");
|
||||
data_map3.erase("CUSTOM");
|
||||
auto lookup_callback3 = [data_map3, compare_test](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
compare_test(data_map3, callback_data);
|
||||
test->IncrementNumCallbacks();
|
||||
@@ -1455,7 +1455,7 @@ class HashTableTestHelper {
|
||||
RAY_CHECK_OK(
|
||||
client->resource_table().Update(job_id, client_id, data_map1, update_callback1));
|
||||
auto lookup_callback4 = [data_map1, compare_test](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
compare_test(data_map1, callback_data);
|
||||
test->IncrementNumCallbacks();
|
||||
@@ -1466,7 +1466,7 @@ class HashTableTestHelper {
|
||||
RAY_CHECK_OK(client->resource_table().RemoveEntries(
|
||||
job_id, client_id, {"GPU", "CPU", "CUSTOM", "None-Existent"}, nullptr));
|
||||
auto lookup_callback5 = [expected_count](
|
||||
RedisGcsClient *client, const ClientID &id,
|
||||
RedisGcsClient *client, const NodeID &id,
|
||||
const DynamicResourceTable::DataMap &callback_data) {
|
||||
ASSERT_EQ(callback_data.size(), 0);
|
||||
test->IncrementNumCallbacks();
|
||||
|
||||
@@ -23,12 +23,12 @@ namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class NodeDynamicResourceTest : public AccessorTestBase<ClientID, ResourceTableData> {
|
||||
class NodeDynamicResourceTest : public AccessorTestBase<NodeID, ResourceTableData> {
|
||||
protected:
|
||||
typedef NodeInfoAccessor::ResourceMap ResourceMap;
|
||||
virtual void GenTestData() {
|
||||
for (size_t node_index = 0; node_index < node_number_; ++node_index) {
|
||||
ClientID id = ClientID::FromRandom();
|
||||
NodeID id = NodeID::FromRandom();
|
||||
ResourceMap resource_map;
|
||||
for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) {
|
||||
std::shared_ptr<ResourceTableData> rs_data =
|
||||
@@ -44,7 +44,7 @@ class NodeDynamicResourceTest : public AccessorTestBase<ClientID, ResourceTableD
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<ClientID, ResourceMap> id_to_resource_map_;
|
||||
std::unordered_map<NodeID, ResourceMap> id_to_resource_map_;
|
||||
|
||||
size_t node_number_{100};
|
||||
size_t resource_type_number_{5};
|
||||
@@ -59,7 +59,7 @@ TEST_F(NodeDynamicResourceTest, UpdateAndGet) {
|
||||
NodeInfoAccessor &node_accessor = gcs_client_->Nodes();
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
const ClientID &id = node_rs.first;
|
||||
const NodeID &id = node_rs.first;
|
||||
// Update
|
||||
Status status = node_accessor.AsyncUpdateResources(
|
||||
node_rs.first, node_rs.second, [this, &node_accessor, id](Status status) {
|
||||
@@ -95,7 +95,7 @@ TEST_F(NodeDynamicResourceTest, Delete) {
|
||||
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
const ClientID &id = node_rs.first;
|
||||
const NodeID &id = node_rs.first;
|
||||
// Delete
|
||||
Status status = node_accessor.AsyncDeleteResources(
|
||||
id, resource_to_delete_, [this, &node_accessor, id](Status status) {
|
||||
@@ -128,7 +128,7 @@ TEST_F(NodeDynamicResourceTest, Subscribe) {
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
|
||||
auto subscribe = [this](const rpc::NodeResourceChange ¬ification) {
|
||||
auto id = ClientID::FromBinary(notification.node_id());
|
||||
auto id = NodeID::FromBinary(notification.node_id());
|
||||
RAY_LOG(INFO) << "receive client id=" << id;
|
||||
auto it = id_to_resource_map_.find(id);
|
||||
ASSERT_TRUE(it != id_to_resource_map_.end());
|
||||
|
||||
@@ -32,7 +32,7 @@ class RedisObjectInfoAccessorTest : public AccessorTestBase<ObjectID, ObjectTabl
|
||||
ObjectVector object_vec;
|
||||
for (size_t j = 0; j < copy_count_; ++j) {
|
||||
auto object = std::make_shared<ObjectTableData>();
|
||||
ClientID node_id = ClientID::FromRandom();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
object->set_manager(node_id.Binary());
|
||||
object_vec.emplace_back(std::move(object));
|
||||
}
|
||||
@@ -55,7 +55,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
for (const auto &item : elem.second) {
|
||||
++pending_count_;
|
||||
ClientID node_id = ClientID::FromBinary(item->manager());
|
||||
NodeID node_id = NodeID::FromBinary(item->manager());
|
||||
RAY_CHECK_OK(
|
||||
object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
@@ -113,7 +113,7 @@ TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
|
||||
++pending_count_;
|
||||
++sub_pending_count;
|
||||
const ObjectVector &object_vec = elem.second;
|
||||
ClientID node_id = ClientID::FromBinary(object_vec[0]->manager());
|
||||
NodeID node_id = NodeID::FromBinary(object_vec[0]->manager());
|
||||
RAY_CHECK_OK(
|
||||
object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
|
||||
@@ -103,12 +103,12 @@ class SubscriptionExecutorTest : public AccessorTestBase<ActorID, ActorTableData
|
||||
TEST_F(SubscriptionExecutorTest, SubscribeAllTest) {
|
||||
++do_sub_pending_count_;
|
||||
Status status =
|
||||
actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
|
||||
actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
|
||||
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
|
||||
ASSERT_TRUE(status.ok());
|
||||
sub_pending_count_ = id_to_data_.size();
|
||||
AsyncRegisterActorToGcs();
|
||||
status = actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
|
||||
status = actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
|
||||
ASSERT_TRUE(status.IsInvalid());
|
||||
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
|
||||
}
|
||||
@@ -117,13 +117,13 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneWithClientIDTest) {
|
||||
const auto &item = id_to_data_.begin();
|
||||
++do_sub_pending_count_;
|
||||
++sub_pending_count_;
|
||||
Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
|
||||
Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
|
||||
subscribe_, sub_done_);
|
||||
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
|
||||
ASSERT_TRUE(status.ok());
|
||||
AsyncRegisterActorToGcs();
|
||||
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
|
||||
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
|
||||
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
|
||||
subscribe_, sub_done_);
|
||||
ASSERT_TRUE(status.IsInvalid());
|
||||
}
|
||||
@@ -133,12 +133,12 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT
|
||||
++do_sub_pending_count_;
|
||||
++sub_pending_count_;
|
||||
AsyncRegisterActorToGcs();
|
||||
Status status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
|
||||
Status status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
|
||||
subscribe_, sub_done_);
|
||||
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
|
||||
ASSERT_TRUE(status.ok());
|
||||
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
|
||||
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item->first,
|
||||
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item->first,
|
||||
subscribe_, sub_done_);
|
||||
ASSERT_TRUE(status.IsInvalid());
|
||||
}
|
||||
@@ -146,11 +146,11 @@ TEST_F(SubscriptionExecutorTest, SubscribeOneAfterActorRegistrationWithClientIDT
|
||||
TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) {
|
||||
++do_sub_pending_count_;
|
||||
Status status =
|
||||
actor_sub_executor_->AsyncSubscribeAll(ClientID::Nil(), subscribe_, sub_done_);
|
||||
actor_sub_executor_->AsyncSubscribeAll(NodeID::Nil(), subscribe_, sub_done_);
|
||||
ASSERT_TRUE(status.ok());
|
||||
WaitPendingDone(do_sub_pending_count_, wait_pending_timeout_);
|
||||
for (const auto &item : id_to_data_) {
|
||||
status = actor_sub_executor_->AsyncSubscribe(ClientID::FromRandom(), item.first,
|
||||
status = actor_sub_executor_->AsyncSubscribe(NodeID::FromRandom(), item.first,
|
||||
subscribe_, sub_done_);
|
||||
ASSERT_FALSE(status.ok());
|
||||
}
|
||||
@@ -160,7 +160,7 @@ TEST_F(SubscriptionExecutorTest, SubscribeAllAndSubscribeOneTest) {
|
||||
}
|
||||
|
||||
TEST_F(SubscriptionExecutorTest, UnsubscribeTest) {
|
||||
ClientID client_id = ClientID::FromRandom();
|
||||
NodeID client_id = NodeID::FromRandom();
|
||||
Status status;
|
||||
for (const auto &item : id_to_data_) {
|
||||
status = actor_sub_executor_->AsyncUnsubscribe(client_id, item.first, unsub_done_);
|
||||
|
||||
@@ -32,13 +32,13 @@ using ray::rpc::ObjectTableData;
|
||||
bool UpdateObjectLocations(bool is_added,
|
||||
const std::vector<ObjectTableData> &location_updates,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client,
|
||||
std::unordered_set<ClientID> *node_ids) {
|
||||
std::unordered_set<NodeID> *node_ids) {
|
||||
// location_updates contains the updates of locations of the object.
|
||||
// with GcsChangeMode, we can determine whether the update mode is
|
||||
// addition or deletion.
|
||||
bool isUpdated = false;
|
||||
for (const auto &object_table_data : location_updates) {
|
||||
ClientID node_id = ClientID::FromBinary(object_table_data.manager());
|
||||
NodeID node_id = NodeID::FromBinary(object_table_data.manager());
|
||||
if (is_added && 0 == node_ids->count(node_id)) {
|
||||
node_ids->insert(node_id);
|
||||
isUpdated = true;
|
||||
@@ -62,7 +62,7 @@ bool UpdateObjectLocations(bool is_added,
|
||||
} // namespace
|
||||
|
||||
ray::Status ObjectDirectory::ReportObjectAdded(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) {
|
||||
RAY_LOG(DEBUG) << "Reporting object added to GCS " << object_id;
|
||||
ray::Status status =
|
||||
@@ -71,7 +71,7 @@ ray::Status ObjectDirectory::ReportObjectAdded(
|
||||
}
|
||||
|
||||
ray::Status ObjectDirectory::ReportObjectRemoved(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) {
|
||||
RAY_LOG(DEBUG) << "Reporting object removed to GCS " << object_id;
|
||||
ray::Status status =
|
||||
@@ -83,7 +83,7 @@ void ObjectDirectory::LookupRemoteConnectionInfo(
|
||||
RemoteConnectionInfo &connection_info) const {
|
||||
auto node_info = gcs_client_->Nodes().Get(connection_info.client_id);
|
||||
if (node_info) {
|
||||
ClientID result_node_id = ClientID::FromBinary(node_info->node_id());
|
||||
NodeID result_node_id = NodeID::FromBinary(node_info->node_id());
|
||||
RAY_CHECK(result_node_id == connection_info.client_id);
|
||||
if (node_info->state() == GcsNodeInfo::ALIVE) {
|
||||
connection_info.ip = node_info->node_manager_address();
|
||||
@@ -105,7 +105,7 @@ std::vector<RemoteConnectionInfo> ObjectDirectory::LookupAllRemoteConnections()
|
||||
return remote_connections;
|
||||
}
|
||||
|
||||
void ObjectDirectory::HandleClientRemoved(const ClientID &client_id) {
|
||||
void ObjectDirectory::HandleClientRemoved(const NodeID &client_id) {
|
||||
for (auto &listener : listeners_) {
|
||||
const ObjectID &object_id = listener.first;
|
||||
if (listener.second.current_object_locations.count(client_id) > 0) {
|
||||
@@ -224,7 +224,7 @@ ray::Status ObjectDirectory::LookupLocations(const ObjectID &object_id,
|
||||
RAY_CHECK(status.ok())
|
||||
<< "Failed to get object location from GCS: " << status.message();
|
||||
// Build the set of current locations based on the entries in the log.
|
||||
std::unordered_set<ClientID> node_ids;
|
||||
std::unordered_set<NodeID> node_ids;
|
||||
UpdateObjectLocations(/*is_added*/ true, location_updates, gcs_client_,
|
||||
&node_ids);
|
||||
// It is safe to call the callback directly since this is already running
|
||||
|
||||
@@ -29,13 +29,13 @@ namespace ray {
|
||||
|
||||
/// Connection information for remote object managers.
|
||||
struct RemoteConnectionInfo {
|
||||
RemoteConnectionInfo(const ClientID &id) : client_id(id) {}
|
||||
RemoteConnectionInfo(const NodeID &id) : client_id(id) {}
|
||||
|
||||
// Returns whether there is enough information to connect to the remote
|
||||
// object manager.
|
||||
bool Connected() const { return !ip.empty(); }
|
||||
|
||||
ClientID client_id;
|
||||
NodeID client_id;
|
||||
std::string ip;
|
||||
uint16_t port;
|
||||
};
|
||||
@@ -60,7 +60,7 @@ class ObjectDirectoryInterface {
|
||||
|
||||
/// Callback for object location notifications.
|
||||
using OnLocationsFound = std::function<void(const ray::ObjectID &object_id,
|
||||
const std::unordered_set<ray::ClientID> &)>;
|
||||
const std::unordered_set<ray::NodeID> &)>;
|
||||
|
||||
/// Lookup object locations. Callback may be invoked with empty list of client ids.
|
||||
///
|
||||
@@ -76,9 +76,9 @@ class ObjectDirectoryInterface {
|
||||
/// location, and fires the subscribed callbacks for those objects.
|
||||
///
|
||||
/// \param client_id The object manager client that was removed.
|
||||
virtual void HandleClientRemoved(const ClientID &client_id) = 0;
|
||||
virtual void HandleClientRemoved(const NodeID &client_id) = 0;
|
||||
|
||||
/// Subscribe to be notified of locations (ClientID) of the given object.
|
||||
/// Subscribe to be notified of locations (NodeID) of the given object.
|
||||
/// The callback will be invoked with the complete list of known locations
|
||||
/// whenever the set of locations changes. The callback will also be fired if
|
||||
/// the list of known locations is empty. The callback provided to this
|
||||
@@ -113,7 +113,7 @@ class ObjectDirectoryInterface {
|
||||
/// \param object_info Additional information about the object.
|
||||
/// \return Status of whether this method succeeded.
|
||||
virtual ray::Status ReportObjectAdded(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) = 0;
|
||||
|
||||
/// Report objects removed from this client's store to the object directory.
|
||||
@@ -123,7 +123,7 @@ class ObjectDirectoryInterface {
|
||||
/// \param object_info Additional information about the object.
|
||||
/// \return Status of whether this method succeeded.
|
||||
virtual ray::Status ReportObjectRemoved(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) = 0;
|
||||
|
||||
/// Returns debug string for class.
|
||||
@@ -154,7 +154,7 @@ class ObjectDirectory : public ObjectDirectoryInterface {
|
||||
const rpc::Address &owner_address,
|
||||
const OnLocationsFound &callback) override;
|
||||
|
||||
void HandleClientRemoved(const ClientID &client_id) override;
|
||||
void HandleClientRemoved(const NodeID &client_id) override;
|
||||
|
||||
ray::Status SubscribeObjectLocations(const UniqueID &callback_id,
|
||||
const ObjectID &object_id,
|
||||
@@ -164,10 +164,10 @@ class ObjectDirectory : public ObjectDirectoryInterface {
|
||||
const ObjectID &object_id) override;
|
||||
|
||||
ray::Status ReportObjectAdded(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) override;
|
||||
ray::Status ReportObjectRemoved(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) override;
|
||||
|
||||
std::string DebugString() const override;
|
||||
@@ -181,7 +181,7 @@ class ObjectDirectory : public ObjectDirectoryInterface {
|
||||
/// The callback to invoke when object locations are found.
|
||||
std::unordered_map<UniqueID, OnLocationsFound> callbacks;
|
||||
/// The current set of known locations of this object.
|
||||
std::unordered_set<ClientID> current_object_locations;
|
||||
std::unordered_set<NodeID> current_object_locations;
|
||||
/// This flag will get set to true if received any notification of the object.
|
||||
/// It means current_object_locations is up-to-date with GCS. It
|
||||
/// should never go back to false once set to true. If this is true, and
|
||||
|
||||
@@ -48,7 +48,7 @@ ObjectStoreRunner::~ObjectStoreRunner() {
|
||||
}
|
||||
}
|
||||
|
||||
ObjectManager::ObjectManager(asio::io_service &main_service, const ClientID &self_node_id,
|
||||
ObjectManager::ObjectManager(asio::io_service &main_service, const NodeID &self_node_id,
|
||||
const ObjectManagerConfig &config,
|
||||
std::shared_ptr<ObjectDirectoryInterface> object_directory)
|
||||
: self_node_id_(self_node_id),
|
||||
@@ -184,7 +184,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id,
|
||||
// no ordering guarantee between notifications.
|
||||
return object_directory_->SubscribeObjectLocations(
|
||||
object_directory_pull_callback_id_, object_id, owner_address,
|
||||
[this](const ObjectID &object_id, const std::unordered_set<ClientID> &client_ids) {
|
||||
[this](const ObjectID &object_id, const std::unordered_set<NodeID> &client_ids) {
|
||||
// Exit if the Pull request has already been fulfilled or canceled.
|
||||
auto it = pull_requests_.find(object_id);
|
||||
if (it == pull_requests_.end()) {
|
||||
@@ -195,7 +195,7 @@ ray::Status ObjectManager::Pull(const ObjectID &object_id,
|
||||
// we may end up sending a duplicate request to the same client as
|
||||
// before.
|
||||
it->second.client_locations =
|
||||
std::vector<ClientID>(client_ids.begin(), client_ids.end());
|
||||
std::vector<NodeID>(client_ids.begin(), client_ids.end());
|
||||
if (it->second.client_locations.empty()) {
|
||||
// The object locations are now empty, so we should wait for the next
|
||||
// notification about a new object location. Cancel the timer until
|
||||
@@ -244,7 +244,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) {
|
||||
// Generate a random index.
|
||||
std::uniform_int_distribution<int> distribution(0, node_vector.size() - 1);
|
||||
int node_index = distribution(gen_);
|
||||
ClientID node_id = node_vector[node_index];
|
||||
NodeID node_id = node_vector[node_index];
|
||||
// If the object manager somehow ended up choosing itself, choose a different
|
||||
// object manager.
|
||||
if (node_id == self_node_id_) {
|
||||
@@ -309,7 +309,7 @@ void ObjectManager::TryPull(const ObjectID &object_id) {
|
||||
};
|
||||
|
||||
void ObjectManager::SendPullRequest(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
std::shared_ptr<rpc::ObjectManagerClient> rpc_client) {
|
||||
rpc::PullRequest pull_request;
|
||||
pull_request.set_object_id(object_id.Binary());
|
||||
@@ -325,7 +325,7 @@ void ObjectManager::SendPullRequest(
|
||||
}
|
||||
|
||||
void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id,
|
||||
const ClientID &client_id) {
|
||||
const NodeID &client_id) {
|
||||
RAY_LOG(WARNING) << "Invalid Push request ObjectID: " << object_id
|
||||
<< " after waiting for " << config_.push_timeout_ms << " ms.";
|
||||
auto iter = unfulfilled_push_requests_.find(object_id);
|
||||
@@ -337,10 +337,9 @@ void ObjectManager::HandlePushTaskTimeout(const ObjectID &object_id,
|
||||
}
|
||||
}
|
||||
|
||||
void ObjectManager::HandleSendFinished(const ObjectID &object_id,
|
||||
const ClientID &client_id, uint64_t chunk_index,
|
||||
double start_time, double end_time,
|
||||
ray::Status status) {
|
||||
void ObjectManager::HandleSendFinished(const ObjectID &object_id, const NodeID &client_id,
|
||||
uint64_t chunk_index, double start_time,
|
||||
double end_time, ray::Status status) {
|
||||
RAY_LOG(DEBUG) << "HandleSendFinished on " << self_node_id_ << " to " << client_id
|
||||
<< " of object " << object_id << " chunk " << chunk_index
|
||||
<< ", status: " << status.ToString();
|
||||
@@ -363,7 +362,7 @@ void ObjectManager::HandleSendFinished(const ObjectID &object_id,
|
||||
}
|
||||
|
||||
void ObjectManager::HandleReceiveFinished(const ObjectID &object_id,
|
||||
const ClientID &client_id, uint64_t chunk_index,
|
||||
const NodeID &client_id, uint64_t chunk_index,
|
||||
double start_time, double end_time,
|
||||
ray::Status status) {
|
||||
if (!status.ok()) {
|
||||
@@ -385,7 +384,7 @@ void ObjectManager::HandleReceiveFinished(const ObjectID &object_id,
|
||||
profile_events_.push_back(profile_event);
|
||||
}
|
||||
|
||||
void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) {
|
||||
void ObjectManager::Push(const ObjectID &object_id, const NodeID &client_id) {
|
||||
RAY_LOG(DEBUG) << "Push on " << self_node_id_ << " to " << client_id << " of object "
|
||||
<< object_id;
|
||||
if (local_objects_.count(object_id) == 0) {
|
||||
@@ -483,7 +482,7 @@ void ObjectManager::Push(const ObjectID &object_id, const ClientID &client_id) {
|
||||
|
||||
ray::Status ObjectManager::SendObjectChunk(
|
||||
const UniqueID &push_id, const ObjectID &object_id, const rpc::Address &owner_address,
|
||||
const ClientID &client_id, uint64_t data_size, uint64_t metadata_size,
|
||||
const NodeID &client_id, uint64_t data_size, uint64_t metadata_size,
|
||||
uint64_t chunk_index, std::shared_ptr<rpc::ObjectManagerClient> rpc_client) {
|
||||
double start_time = absl::GetCurrentTimeNanos() / 1e9;
|
||||
rpc::PushRequest push_request;
|
||||
@@ -606,7 +605,7 @@ ray::Status ObjectManager::LookupRemainingWaitObjects(const UniqueID &wait_id) {
|
||||
RAY_RETURN_NOT_OK(object_directory_->LookupLocations(
|
||||
object_id, wait_state.owner_addresses[object_id],
|
||||
[this, wait_id](const ObjectID &lookup_object_id,
|
||||
const std::unordered_set<ClientID> &client_ids) {
|
||||
const std::unordered_set<NodeID> &client_ids) {
|
||||
auto &wait_state = active_wait_requests_.find(wait_id)->second;
|
||||
// Note that the object is guaranteed to be added to local_objects_ before
|
||||
// the notification is triggered.
|
||||
@@ -647,7 +646,7 @@ void ObjectManager::SubscribeRemainingWaitObjects(const UniqueID &wait_id) {
|
||||
RAY_CHECK_OK(object_directory_->SubscribeObjectLocations(
|
||||
wait_id, object_id, wait_state.owner_addresses[object_id],
|
||||
[this, wait_id](const ObjectID &subscribe_object_id,
|
||||
const std::unordered_set<ClientID> &client_ids) {
|
||||
const std::unordered_set<NodeID> &client_ids) {
|
||||
auto object_id_wait_state = active_wait_requests_.find(wait_id);
|
||||
if (object_id_wait_state == active_wait_requests_.end()) {
|
||||
// Depending on the timing of calls to the object directory, we
|
||||
@@ -738,7 +737,7 @@ void ObjectManager::WaitComplete(const UniqueID &wait_id) {
|
||||
void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ObjectID object_id = ObjectID::FromBinary(request.object_id());
|
||||
ClientID client_id = ClientID::FromBinary(request.client_id());
|
||||
NodeID client_id = NodeID::FromBinary(request.client_id());
|
||||
|
||||
// Serialize.
|
||||
uint64_t chunk_index = request.chunk_index();
|
||||
@@ -756,7 +755,7 @@ void ObjectManager::HandlePush(const rpc::PushRequest &request, rpc::PushReply *
|
||||
send_reply_callback(status, nullptr, nullptr);
|
||||
}
|
||||
|
||||
ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id,
|
||||
ray::Status ObjectManager::ReceiveObjectChunk(const NodeID &client_id,
|
||||
const ObjectID &object_id,
|
||||
const rpc::Address &owner_address,
|
||||
uint64_t data_size, uint64_t metadata_size,
|
||||
@@ -787,7 +786,7 @@ ray::Status ObjectManager::ReceiveObjectChunk(const ClientID &client_id,
|
||||
void ObjectManager::HandlePull(const rpc::PullRequest &request, rpc::PullReply *reply,
|
||||
rpc::SendReplyCallback send_reply_callback) {
|
||||
ObjectID object_id = ObjectID::FromBinary(request.object_id());
|
||||
ClientID client_id = ClientID::FromBinary(request.client_id());
|
||||
NodeID client_id = NodeID::FromBinary(request.client_id());
|
||||
RAY_LOG(DEBUG) << "Received pull request from client " << client_id << " for object ["
|
||||
<< object_id << "].";
|
||||
|
||||
@@ -855,7 +854,7 @@ void ObjectManager::SpreadFreeObjectsRequest(
|
||||
}
|
||||
|
||||
std::shared_ptr<rpc::ObjectManagerClient> ObjectManager::GetRpcClient(
|
||||
const ClientID &client_id) {
|
||||
const NodeID &client_id) {
|
||||
auto it = remote_object_manager_clients_.find(client_id);
|
||||
if (it == remote_object_manager_clients_.end()) {
|
||||
RemoteConnectionInfo connection_info(client_id);
|
||||
|
||||
@@ -26,10 +26,6 @@
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <boost/asio.hpp>
|
||||
#include <boost/asio/error.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "absl/container/flat_hash_set.h"
|
||||
#include "absl/time/clock.h"
|
||||
@@ -80,7 +76,7 @@ struct LocalObjectInfo {
|
||||
object_manager::protocol::ObjectInfoT object_info;
|
||||
/// A map from the ID of a remote object manager to the timestamp of when
|
||||
/// the object was last pushed to that object manager (if a push took place).
|
||||
std::unordered_map<ClientID, int64_t> recent_pushes;
|
||||
std::unordered_map<NodeID, int64_t> recent_pushes;
|
||||
};
|
||||
|
||||
class ObjectStoreRunner {
|
||||
@@ -146,9 +142,9 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// \param chunk_index Chunk index of this object chunk, start with 0
|
||||
/// \param rpc_client Rpc client used to send message to remote object manager
|
||||
ray::Status SendObjectChunk(const UniqueID &push_id, const ObjectID &object_id,
|
||||
const rpc::Address &owner_address,
|
||||
const ClientID &client_id, uint64_t data_size,
|
||||
uint64_t metadata_size, uint64_t chunk_index,
|
||||
const rpc::Address &owner_address, const NodeID &client_id,
|
||||
uint64_t data_size, uint64_t metadata_size,
|
||||
uint64_t chunk_index,
|
||||
std::shared_ptr<rpc::ObjectManagerClient> rpc_client);
|
||||
|
||||
/// Receive object chunk from remote object manager, small object may contain one chunk
|
||||
@@ -160,7 +156,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// \param metadata_size Metadata size
|
||||
/// \param chunk_index Chunk index
|
||||
/// \param data Chunk data
|
||||
ray::Status ReceiveObjectChunk(const ClientID &client_id, const ObjectID &object_id,
|
||||
ray::Status ReceiveObjectChunk(const NodeID &client_id, const ObjectID &object_id,
|
||||
const rpc::Address &owner_address, uint64_t data_size,
|
||||
uint64_t metadata_size, uint64_t chunk_index,
|
||||
const std::string &data);
|
||||
@@ -169,13 +165,13 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
///
|
||||
/// \param object_id Object id
|
||||
/// \param client_id Remote server client id
|
||||
void SendPullRequest(const ObjectID &object_id, const ClientID &client_id,
|
||||
void SendPullRequest(const ObjectID &object_id, const NodeID &client_id,
|
||||
std::shared_ptr<rpc::ObjectManagerClient> rpc_client);
|
||||
|
||||
/// Get the rpc client according to the client ID
|
||||
///
|
||||
/// \param client_id Remote client id, will send rpc request to it
|
||||
std::shared_ptr<rpc::ObjectManagerClient> GetRpcClient(const ClientID &client_id);
|
||||
std::shared_ptr<rpc::ObjectManagerClient> GetRpcClient(const NodeID &client_id);
|
||||
|
||||
/// Get the port of the object manager rpc server.
|
||||
int GetServerPort() const { return object_manager_server_.GetPort(); }
|
||||
@@ -189,7 +185,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// \param config ObjectManager configuration.
|
||||
/// \param object_directory An object implementing the object directory interface.
|
||||
explicit ObjectManager(boost::asio::io_service &main_service,
|
||||
const ClientID &self_node_id, const ObjectManagerConfig &config,
|
||||
const NodeID &self_node_id, const ObjectManagerConfig &config,
|
||||
std::shared_ptr<ObjectDirectoryInterface> object_directory);
|
||||
|
||||
~ObjectManager();
|
||||
@@ -221,9 +217,9 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// \param object_id The object's object id.
|
||||
/// \param client_id The remote node's client id.
|
||||
/// \return Void.
|
||||
void Push(const ObjectID &object_id, const ClientID &client_id);
|
||||
void Push(const ObjectID &object_id, const NodeID &client_id);
|
||||
|
||||
/// Pull an object from ClientID.
|
||||
/// Pull an object from NodeID.
|
||||
///
|
||||
/// \param object_id The object's object id.
|
||||
/// \return Status of whether the pull request successfully initiated.
|
||||
@@ -294,7 +290,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
PullRequest() : retry_timer(nullptr), timer_set(false), client_locations() {}
|
||||
std::unique_ptr<boost::asio::deadline_timer> retry_timer;
|
||||
bool timer_set;
|
||||
std::vector<ClientID> client_locations;
|
||||
std::vector<NodeID> client_locations;
|
||||
};
|
||||
|
||||
struct WaitState {
|
||||
@@ -377,7 +373,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// chunk.
|
||||
/// \param status The status of the send (e.g., did it succeed or fail).
|
||||
/// \return Void.
|
||||
void HandleSendFinished(const ObjectID &object_id, const ClientID &client_id,
|
||||
void HandleSendFinished(const ObjectID &object_id, const NodeID &client_id,
|
||||
uint64_t chunk_index, double start_time_us, double end_time_us,
|
||||
ray::Status status);
|
||||
|
||||
@@ -393,14 +389,14 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// chunk.
|
||||
/// \param status The status of the receive (e.g., did it succeed or fail).
|
||||
/// \return Void.
|
||||
void HandleReceiveFinished(const ObjectID &object_id, const ClientID &client_id,
|
||||
void HandleReceiveFinished(const ObjectID &object_id, const NodeID &client_id,
|
||||
uint64_t chunk_index, double start_time_us,
|
||||
double end_time_us, ray::Status status);
|
||||
|
||||
/// Handle Push task timeout.
|
||||
void HandlePushTaskTimeout(const ObjectID &object_id, const ClientID &client_id);
|
||||
void HandlePushTaskTimeout(const ObjectID &object_id, const NodeID &client_id);
|
||||
|
||||
ClientID self_node_id_;
|
||||
NodeID self_node_id_;
|
||||
const ObjectManagerConfig config_;
|
||||
std::shared_ptr<ObjectDirectoryInterface> object_directory_;
|
||||
// Object store runner.
|
||||
@@ -439,8 +435,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
/// Maintains a map of push requests that have not been fulfilled due to an object not
|
||||
/// being local. Objects are removed from this map after push_timeout_ms have elapsed.
|
||||
std::unordered_map<
|
||||
ObjectID,
|
||||
std::unordered_map<ClientID, std::unique_ptr<boost::asio::deadline_timer>>>
|
||||
ObjectID, std::unordered_map<NodeID, std::unique_ptr<boost::asio::deadline_timer>>>
|
||||
unfulfilled_push_requests_;
|
||||
|
||||
/// The objects that this object manager is currently trying to fetch from
|
||||
@@ -468,7 +463,7 @@ class ObjectManager : public ObjectManagerInterface,
|
||||
rpc::ClientCallManager client_call_manager_;
|
||||
|
||||
/// Client id - object manager gRPC client.
|
||||
std::unordered_map<ClientID, std::shared_ptr<rpc::ObjectManagerClient>>
|
||||
std::unordered_map<NodeID, std::shared_ptr<rpc::ObjectManagerClient>>
|
||||
remote_object_manager_clients_;
|
||||
|
||||
/// Running sum of the amount of memory used in the object store.
|
||||
|
||||
@@ -24,7 +24,7 @@ namespace {
|
||||
|
||||
/// Filter out the removed clients from the object locations.
|
||||
void FilterRemovedClients(std::shared_ptr<gcs::GcsClient> gcs_client,
|
||||
std::unordered_set<ClientID> *node_ids) {
|
||||
std::unordered_set<NodeID> *node_ids) {
|
||||
for (auto it = node_ids->begin(); it != node_ids->end();) {
|
||||
if (gcs_client->Nodes().IsRemoved(*it)) {
|
||||
it = node_ids->erase(it);
|
||||
@@ -64,7 +64,7 @@ std::shared_ptr<rpc::CoreWorkerClient> OwnershipBasedObjectDirectory::GetClient(
|
||||
}
|
||||
|
||||
ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) {
|
||||
WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id);
|
||||
rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info);
|
||||
@@ -91,7 +91,7 @@ ray::Status OwnershipBasedObjectDirectory::ReportObjectAdded(
|
||||
}
|
||||
|
||||
ray::Status OwnershipBasedObjectDirectory::ReportObjectRemoved(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) {
|
||||
WorkerID worker_id = WorkerID::FromBinary(object_info.owner_worker_id);
|
||||
rpc::Address owner_address = GetOwnerAddressFromObjectInfo(object_info);
|
||||
@@ -126,9 +126,9 @@ void OwnershipBasedObjectDirectory::SubscriptionCallback(
|
||||
return;
|
||||
}
|
||||
|
||||
std::unordered_set<ClientID> client_ids;
|
||||
std::unordered_set<NodeID> client_ids;
|
||||
for (auto const &client_id : reply.client_ids()) {
|
||||
client_ids.emplace(ClientID::FromBinary(client_id));
|
||||
client_ids.emplace(NodeID::FromBinary(client_id));
|
||||
}
|
||||
FilterRemovedClients(gcs_client_, &client_ids);
|
||||
if (client_ids != it->second.current_object_locations) {
|
||||
@@ -208,7 +208,7 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations(
|
||||
RAY_LOG(WARNING) << "Object " << object_id << " does not have owner. "
|
||||
<< "LookupLocations returns an empty list of locations.";
|
||||
io_service_.post(
|
||||
[callback, object_id]() { callback(object_id, std::unordered_set<ClientID>()); });
|
||||
[callback, object_id]() { callback(object_id, std::unordered_set<NodeID>()); });
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@@ -223,9 +223,9 @@ ray::Status OwnershipBasedObjectDirectory::LookupLocations(
|
||||
RAY_LOG(ERROR) << "Worker " << worker_id << " failed to get the location for "
|
||||
<< object_id;
|
||||
}
|
||||
std::unordered_set<ClientID> client_ids;
|
||||
std::unordered_set<NodeID> client_ids;
|
||||
for (auto const &client_id : reply.client_ids()) {
|
||||
client_ids.emplace(ClientID::FromBinary(client_id));
|
||||
client_ids.emplace(NodeID::FromBinary(client_id));
|
||||
}
|
||||
FilterRemovedClients(gcs_client_, &client_ids);
|
||||
callback(object_id, client_ids);
|
||||
|
||||
@@ -56,10 +56,10 @@ class OwnershipBasedObjectDirectory : public ObjectDirectory {
|
||||
const ObjectID &object_id) override;
|
||||
|
||||
ray::Status ReportObjectAdded(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) override;
|
||||
ray::Status ReportObjectRemoved(
|
||||
const ObjectID &object_id, const ClientID &client_id,
|
||||
const ObjectID &object_id, const NodeID &client_id,
|
||||
const object_manager::protocol::ObjectInfoT &object_info) override;
|
||||
|
||||
std::string DebugString() const override;
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
namespace plasma {
|
||||
|
||||
using ray::ObjectID;
|
||||
using ray::ClientID;
|
||||
using ray::NodeID;
|
||||
using ray::WorkerID;
|
||||
|
||||
enum class ObjectLocation : int32_t { Local, Remote, Nonexistent };
|
||||
@@ -81,7 +81,7 @@ struct ObjectTableEntry {
|
||||
/// Number of clients currently using this object.
|
||||
int ref_count;
|
||||
/// Owner's raylet ID.
|
||||
ClientID owner_raylet_id;
|
||||
NodeID owner_raylet_id;
|
||||
/// Owner's IP address.
|
||||
std::string owner_ip_address;
|
||||
/// Owner's port.
|
||||
|
||||
@@ -206,7 +206,7 @@ Status SendCreateRequest(const std::shared_ptr<StoreConn> &store_conn, ObjectID
|
||||
}
|
||||
|
||||
Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
|
||||
ClientID* owner_raylet_id, std::string* owner_ip_address,
|
||||
NodeID* owner_raylet_id, std::string* owner_ip_address,
|
||||
int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full,
|
||||
int64_t* data_size, int64_t* metadata_size,
|
||||
int* device_num) {
|
||||
@@ -217,7 +217,7 @@ Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
|
||||
*data_size = message->data_size();
|
||||
*metadata_size = message->metadata_size();
|
||||
*object_id = ObjectID::FromBinary(message->object_id()->str());
|
||||
*owner_raylet_id = ClientID::FromBinary(message->owner_raylet_id()->str());
|
||||
*owner_raylet_id = NodeID::FromBinary(message->owner_raylet_id()->str());
|
||||
*owner_ip_address = message->owner_ip_address()->str();
|
||||
*owner_port = message->owner_port();
|
||||
*owner_worker_id = WorkerID::FromBinary(message->owner_worker_id()->str());
|
||||
|
||||
@@ -85,7 +85,7 @@ Status SendCreateRequest(const std::shared_ptr<StoreConn> &store_conn, ObjectID
|
||||
int64_t data_size, int64_t metadata_size, int device_num);
|
||||
|
||||
Status ReadCreateRequest(uint8_t* data, size_t size, ObjectID* object_id,
|
||||
ClientID* owner_raylet_id, std::string* owner_ip_address,
|
||||
NodeID* owner_raylet_id, std::string* owner_ip_address,
|
||||
int* owner_port, WorkerID* owner_worker_id, bool* evict_if_full,
|
||||
int64_t* data_size, int64_t* metadata_size,
|
||||
int* device_num);
|
||||
|
||||
@@ -227,7 +227,7 @@ Status PlasmaStore::FreeCudaMemory(int device_num, int64_t size, uint8_t* pointe
|
||||
|
||||
// Create a new object buffer in the hash table.
|
||||
PlasmaError PlasmaStore::CreateObject(const ObjectID& object_id,
|
||||
const ClientID& owner_raylet_id,
|
||||
const NodeID& owner_raylet_id,
|
||||
const std::string& owner_ip_address,
|
||||
int owner_port, const WorkerID& owner_worker_id,
|
||||
bool evict_if_full, int64_t data_size,
|
||||
@@ -884,7 +884,7 @@ Status PlasmaStore::ProcessMessage(const std::shared_ptr<Client> &client,
|
||||
// Process the different types of requests.
|
||||
switch (type) {
|
||||
case fb::MessageType::PlasmaCreateRequest: {
|
||||
ClientID owner_raylet_id;
|
||||
NodeID owner_raylet_id;
|
||||
std::string owner_ip_address;
|
||||
int owner_port;
|
||||
WorkerID owner_worker_id;
|
||||
|
||||
@@ -94,7 +94,7 @@ class PlasmaStore {
|
||||
/// - PlasmaError::OutOfMemory, if the store is out of memory and
|
||||
/// cannot create the object. In this case, the client should not call
|
||||
/// plasma_release.
|
||||
PlasmaError CreateObject(const ObjectID& object_id, const ClientID& owner_raylet_id,
|
||||
PlasmaError CreateObject(const ObjectID& object_id, const NodeID& owner_raylet_id,
|
||||
const std::string& owner_ip_address, int owner_port,
|
||||
const WorkerID& owner_worker_id, bool evict_if_full,
|
||||
int64_t data_size, int64_t metadata_size, int device_num,
|
||||
|
||||
@@ -50,7 +50,7 @@ class MockServer {
|
||||
MockServer(boost::asio::io_service &main_service,
|
||||
const ObjectManagerConfig &object_manager_config,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client)
|
||||
: node_id_(ClientID::FromRandom()),
|
||||
: node_id_(NodeID::FromRandom()),
|
||||
config_(object_manager_config),
|
||||
gcs_client_(gcs_client),
|
||||
object_manager_(main_service, node_id_, object_manager_config,
|
||||
@@ -75,7 +75,7 @@ class MockServer {
|
||||
|
||||
friend class StressTestObjectManager;
|
||||
|
||||
ClientID node_id_;
|
||||
NodeID node_id_;
|
||||
ObjectManagerConfig config_;
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client_;
|
||||
ObjectManager object_manager_;
|
||||
@@ -199,8 +199,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
|
||||
int num_connected_clients = 0;
|
||||
|
||||
ClientID node_id_1;
|
||||
ClientID node_id_2;
|
||||
NodeID node_id_1;
|
||||
NodeID node_id_2;
|
||||
|
||||
int64_t start_time;
|
||||
|
||||
@@ -208,7 +208,7 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
|
||||
[this](const ClientID &node_id, const GcsNodeInfo &data) {
|
||||
[this](const NodeID &node_id, const GcsNodeInfo &data) {
|
||||
if (node_id == node_id_1 || node_id == node_id_2) {
|
||||
num_connected_clients += 1;
|
||||
}
|
||||
@@ -218,7 +218,7 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
},
|
||||
nullptr));
|
||||
RAY_CHECK_OK(gcs_client_2->Nodes().AsyncSubscribeToNodeChange(
|
||||
[this](const ClientID &node_id, const GcsNodeInfo &data) {
|
||||
[this](const NodeID &node_id, const GcsNodeInfo &data) {
|
||||
if (node_id == node_id_1 || node_id == node_id_2) {
|
||||
num_connected_clients += 1;
|
||||
}
|
||||
@@ -310,8 +310,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
|
||||
void TransferTestExecute(int num_trials, int64_t data_size,
|
||||
TransferPattern transfer_pattern) {
|
||||
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
|
||||
ray::Status status = ray::Status::OK();
|
||||
|
||||
@@ -388,8 +388,8 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
RAY_LOG(DEBUG) << "\n"
|
||||
<< "Server node ids:"
|
||||
<< "\n";
|
||||
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
RAY_LOG(DEBUG) << "Server 1: " << node_id_1 << "\n"
|
||||
<< "Server 2: " << node_id_2;
|
||||
|
||||
@@ -397,11 +397,11 @@ class StressTestObjectManager : public TestObjectManagerBase {
|
||||
<< "All connected nodes:"
|
||||
<< "\n";
|
||||
auto data = gcs_client_1->Nodes().Get(node_id_1);
|
||||
RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data->node_id()) << "\n"
|
||||
RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data->node_id()) << "\n"
|
||||
<< "NodeIp=" << data->node_manager_address() << "\n"
|
||||
<< "NodePort=" << data->node_manager_port();
|
||||
auto data2 = gcs_client_1->Nodes().Get(node_id_2);
|
||||
RAY_LOG(DEBUG) << "NodeID=" << ClientID::FromBinary(data2->node_id()) << "\n"
|
||||
RAY_LOG(DEBUG) << "NodeID=" << NodeID::FromBinary(data2->node_id()) << "\n"
|
||||
<< "NodeIp=" << data2->node_manager_address() << "\n"
|
||||
<< "NodePort=" << data2->node_manager_port();
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ class MockServer {
|
||||
MockServer(boost::asio::io_service &main_service,
|
||||
const ObjectManagerConfig &object_manager_config,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client)
|
||||
: node_id_(ClientID::FromRandom()),
|
||||
: node_id_(NodeID::FromRandom()),
|
||||
config_(object_manager_config),
|
||||
gcs_client_(gcs_client),
|
||||
object_manager_(main_service, node_id_, object_manager_config,
|
||||
@@ -71,7 +71,7 @@ class MockServer {
|
||||
|
||||
friend class TestObjectManager;
|
||||
|
||||
ClientID node_id_;
|
||||
NodeID node_id_;
|
||||
ObjectManagerConfig config_;
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client_;
|
||||
ObjectManager object_manager_;
|
||||
@@ -180,8 +180,8 @@ class TestObjectManager : public TestObjectManagerBase {
|
||||
public:
|
||||
int current_wait_test = -1;
|
||||
int num_connected_clients = 0;
|
||||
ClientID node_id_1;
|
||||
ClientID node_id_2;
|
||||
NodeID node_id_1;
|
||||
NodeID node_id_2;
|
||||
|
||||
ObjectID created_object_id1;
|
||||
ObjectID created_object_id2;
|
||||
@@ -192,7 +192,7 @@ class TestObjectManager : public TestObjectManagerBase {
|
||||
node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
RAY_CHECK_OK(gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
|
||||
[this](const ClientID &node_id, const GcsNodeInfo &data) {
|
||||
[this](const NodeID &node_id, const GcsNodeInfo &data) {
|
||||
if (node_id == node_id_1 || node_id == node_id_2) {
|
||||
num_connected_clients += 1;
|
||||
}
|
||||
@@ -264,7 +264,7 @@ class TestObjectManager : public TestObjectManagerBase {
|
||||
sub_id, object_1, rpc::Address(),
|
||||
[this, sub_id, object_1, object_2](
|
||||
const ray::ObjectID &object_id,
|
||||
const std::unordered_set<ray::ClientID> &clients) {
|
||||
const std::unordered_set<ray::NodeID> &clients) {
|
||||
if (!clients.empty()) {
|
||||
TestWaitWhileSubscribed(sub_id, object_1, object_2);
|
||||
}
|
||||
@@ -430,16 +430,16 @@ class TestObjectManager : public TestObjectManagerBase {
|
||||
<< "Server node ids:"
|
||||
<< "\n";
|
||||
auto data = gcs_client_1->Nodes().Get(node_id_1);
|
||||
RAY_LOG(DEBUG) << (ClientID::FromBinary(data->node_id()).IsNil());
|
||||
RAY_LOG(DEBUG) << "Server 1 NodeID=" << ClientID::FromBinary(data->node_id());
|
||||
RAY_LOG(DEBUG) << (NodeID::FromBinary(data->node_id()).IsNil());
|
||||
RAY_LOG(DEBUG) << "Server 1 NodeID=" << NodeID::FromBinary(data->node_id());
|
||||
RAY_LOG(DEBUG) << "Server 1 NodeIp=" << data->node_manager_address();
|
||||
RAY_LOG(DEBUG) << "Server 1 NodePort=" << data->node_manager_port();
|
||||
ASSERT_EQ(node_id_1, ClientID::FromBinary(data->node_id()));
|
||||
ASSERT_EQ(node_id_1, NodeID::FromBinary(data->node_id()));
|
||||
auto data2 = gcs_client_1->Nodes().Get(node_id_2);
|
||||
RAY_LOG(DEBUG) << "Server 2 NodeID=" << ClientID::FromBinary(data2->node_id());
|
||||
RAY_LOG(DEBUG) << "Server 2 NodeID=" << NodeID::FromBinary(data2->node_id());
|
||||
RAY_LOG(DEBUG) << "Server 2 NodeIp=" << data2->node_manager_address();
|
||||
RAY_LOG(DEBUG) << "Server 2 NodePort=" << data2->node_manager_port();
|
||||
ASSERT_EQ(node_id_2, ClientID::FromBinary(data2->node_id()));
|
||||
ASSERT_EQ(node_id_2, NodeID::FromBinary(data2->node_id()));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -49,8 +49,8 @@ ActorRegistration::ActorRegistration(const ActorTableData &actor_table_data,
|
||||
}
|
||||
}
|
||||
|
||||
const ClientID ActorRegistration::GetNodeManagerId() const {
|
||||
return ClientID::FromBinary(actor_table_data_.address().raylet_id());
|
||||
const NodeID ActorRegistration::GetNodeManagerId() const {
|
||||
return NodeID::FromBinary(actor_table_data_.address().raylet_id());
|
||||
}
|
||||
|
||||
const ObjectID ActorRegistration::GetActorCreationDependency() const {
|
||||
|
||||
@@ -77,7 +77,7 @@ class ActorRegistration {
|
||||
///
|
||||
/// \return The actor's node manager location. All tasks for the actor should
|
||||
/// be forwarded to this node.
|
||||
const ClientID GetNodeManagerId() const;
|
||||
const NodeID GetNodeManagerId() const;
|
||||
|
||||
/// Get the object that represents the actor's initial state. This is the
|
||||
/// execution dependency returned by this actor's creation task. If
|
||||
|
||||
@@ -33,7 +33,7 @@ typedef std::function<std::shared_ptr<boost::asio::deadline_timer>(std::function
|
||||
class AgentManager : public rpc::AgentManagerServiceHandler {
|
||||
public:
|
||||
struct Options {
|
||||
const ClientID node_id;
|
||||
const NodeID node_id;
|
||||
std::vector<std::string> agent_commands;
|
||||
};
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ table RegisterClientReply {
|
||||
success: bool;
|
||||
// The reason of registration failure.
|
||||
failure_reason: string;
|
||||
// GCS ClientID of the local node manager.
|
||||
// GCS NodeID of the local node manager.
|
||||
raylet_id: string;
|
||||
// Port that this worker should listen on.
|
||||
port: int;
|
||||
|
||||
@@ -117,9 +117,8 @@ std::string WorkerOwnerString(std::shared_ptr<WorkerInterface> &worker) {
|
||||
return buffer.str();
|
||||
}
|
||||
|
||||
NodeManager::NodeManager(boost::asio::io_service &io_service,
|
||||
const ClientID &self_node_id, const NodeManagerConfig &config,
|
||||
ObjectManager &object_manager,
|
||||
NodeManager::NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id,
|
||||
const NodeManagerConfig &config, ObjectManager &object_manager,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client,
|
||||
std::shared_ptr<ObjectDirectoryInterface> object_directory)
|
||||
: self_node_id_(self_node_id),
|
||||
@@ -194,7 +193,7 @@ NodeManager::NodeManager(boost::asio::io_service &io_service,
|
||||
return args_ready;
|
||||
};
|
||||
|
||||
auto get_node_info_func = [this](const ClientID &node_id) {
|
||||
auto get_node_info_func = [this](const NodeID &node_id) {
|
||||
return gcs_client_->Nodes().Get(node_id);
|
||||
};
|
||||
cluster_task_manager_ = std::shared_ptr<ClusterTaskManager>(
|
||||
@@ -231,7 +230,7 @@ ray::Status NodeManager::RegisterGcs() {
|
||||
RAY_RETURN_NOT_OK(
|
||||
gcs_client_->Actors().AsyncSubscribeAll(actor_notification_callback, nullptr));
|
||||
|
||||
auto on_node_change = [this](const ClientID &node_id, const GcsNodeInfo &data) {
|
||||
auto on_node_change = [this](const NodeID &node_id, const GcsNodeInfo &data) {
|
||||
if (data.state() == GcsNodeInfo::ALIVE) {
|
||||
NodeAdded(data);
|
||||
} else {
|
||||
@@ -247,7 +246,7 @@ ray::Status NodeManager::RegisterGcs() {
|
||||
// Subscribe to resource changes.
|
||||
const auto &resources_changed =
|
||||
[this](const rpc::NodeResourceChange &resource_notification) {
|
||||
auto id = ClientID::FromBinary(resource_notification.node_id());
|
||||
auto id = NodeID::FromBinary(resource_notification.node_id());
|
||||
if (resource_notification.updated_resources_size() != 0) {
|
||||
ResourceSet resource_set(
|
||||
MapFromProtobuf(resource_notification.updated_resources()));
|
||||
@@ -720,7 +719,7 @@ void NodeManager::GetObjectManagerProfileInfo() {
|
||||
}
|
||||
|
||||
void NodeManager::NodeAdded(const GcsNodeInfo &node_info) {
|
||||
const ClientID node_id = ClientID::FromBinary(node_info.node_id());
|
||||
const NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
|
||||
RAY_LOG(DEBUG) << "[NodeAdded] Received callback from client id " << node_id;
|
||||
if (1 == cluster_resource_map_.count(node_id)) {
|
||||
@@ -761,7 +760,7 @@ void NodeManager::NodeAdded(const GcsNodeInfo &node_info) {
|
||||
void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) {
|
||||
// TODO(swang): If we receive a notification for our own death, clean up and
|
||||
// exit immediately.
|
||||
const ClientID node_id = ClientID::FromBinary(node_info.node_id());
|
||||
const NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
RAY_LOG(DEBUG) << "[NodeRemoved] Received callback from client id " << node_id;
|
||||
|
||||
RAY_CHECK(node_id != self_node_id_)
|
||||
@@ -810,7 +809,7 @@ void NodeManager::NodeRemoved(const GcsNodeInfo &node_info) {
|
||||
|
||||
void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) {
|
||||
const WorkerID worker_id = WorkerID::FromBinary(address.worker_id());
|
||||
const ClientID node_id = ClientID::FromBinary(address.raylet_id());
|
||||
const NodeID node_id = NodeID::FromBinary(address.raylet_id());
|
||||
if (!worker_id.IsNil()) {
|
||||
RAY_LOG(DEBUG) << "Worker " << worker_id << " failed";
|
||||
failed_workers_cache_.insert(worker_id);
|
||||
@@ -850,7 +849,7 @@ void NodeManager::HandleUnexpectedWorkerFailure(const rpc::Address &address) {
|
||||
}
|
||||
}
|
||||
|
||||
void NodeManager::ResourceCreateUpdated(const ClientID &client_id,
|
||||
void NodeManager::ResourceCreateUpdated(const NodeID &client_id,
|
||||
const ResourceSet &createUpdatedResources) {
|
||||
RAY_LOG(DEBUG) << "[ResourceCreateUpdated] received callback from client id "
|
||||
<< client_id << " with created or updated resources: "
|
||||
@@ -882,7 +881,7 @@ void NodeManager::ResourceCreateUpdated(const ClientID &client_id,
|
||||
return;
|
||||
}
|
||||
|
||||
void NodeManager::ResourceDeleted(const ClientID &client_id,
|
||||
void NodeManager::ResourceDeleted(const NodeID &client_id,
|
||||
const std::vector<std::string> &resource_names) {
|
||||
if (RAY_LOG_ENABLED(DEBUG)) {
|
||||
std::ostringstream oss;
|
||||
@@ -929,7 +928,7 @@ void NodeManager::TryLocalInfeasibleTaskScheduling() {
|
||||
}
|
||||
}
|
||||
|
||||
void NodeManager::HeartbeatAdded(const ClientID &client_id,
|
||||
void NodeManager::HeartbeatAdded(const NodeID &client_id,
|
||||
const HeartbeatTableData &heartbeat_data) {
|
||||
// Locate the client id in remote client table and update available resources based on
|
||||
// the received heartbeat information.
|
||||
@@ -1009,7 +1008,7 @@ void NodeManager::HeartbeatAdded(const ClientID &client_id,
|
||||
void NodeManager::HeartbeatBatchAdded(const HeartbeatBatchTableData &heartbeat_batch) {
|
||||
// Update load information provided by each heartbeat.
|
||||
for (const auto &heartbeat_data : heartbeat_batch.batch()) {
|
||||
const ClientID &client_id = ClientID::FromBinary(heartbeat_data.client_id());
|
||||
const NodeID &client_id = NodeID::FromBinary(heartbeat_data.client_id());
|
||||
if (client_id == self_node_id_) {
|
||||
// Skip heartbeats from self.
|
||||
continue;
|
||||
@@ -1790,7 +1789,7 @@ void NodeManager::HandleRequestWorkerLease(const rpc::RequestWorkerLeaseRequest
|
||||
leased_workers_[worker_id] = worker;
|
||||
});
|
||||
task.OnSpillbackInstead(
|
||||
[reply, task_id, send_reply_callback](const ClientID &spillback_to,
|
||||
[reply, task_id, send_reply_callback](const NodeID &spillback_to,
|
||||
const std::string &address, int port) {
|
||||
RAY_LOG(DEBUG) << "Worker lease request SPILLBACK " << task_id;
|
||||
reply->mutable_retry_at_raylet_address()->set_ip_address(address);
|
||||
@@ -2012,7 +2011,7 @@ void NodeManager::ProcessSetResourceRequest(
|
||||
double const &capacity = message->capacity();
|
||||
bool is_deletion = capacity <= 0;
|
||||
|
||||
ClientID node_id = from_flatbuf<ClientID>(*message->client_id());
|
||||
NodeID node_id = from_flatbuf<NodeID>(*message->client_id());
|
||||
|
||||
// If the python arg was null, set node_id to the local node id.
|
||||
if (node_id.IsNil()) {
|
||||
@@ -2044,7 +2043,7 @@ void NodeManager::ProcessSetResourceRequest(
|
||||
}
|
||||
|
||||
bool NodeManager::PrepareBundle(
|
||||
std::unordered_map<ClientID, SchedulingResources> &resource_map,
|
||||
std::unordered_map<NodeID, SchedulingResources> &resource_map,
|
||||
const BundleSpecification &bundle_spec) {
|
||||
// TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent
|
||||
// once retry is implemented. If the resource map contains the local raylet, update load
|
||||
@@ -2080,7 +2079,7 @@ bool NodeManager::PrepareBundle(
|
||||
}
|
||||
|
||||
void NodeManager::CommitBundle(
|
||||
std::unordered_map<ClientID, SchedulingResources> &resource_map,
|
||||
std::unordered_map<NodeID, SchedulingResources> &resource_map,
|
||||
const BundleSpecification &bundle_spec) {
|
||||
// TODO(sang): It is currently not idempotent because we don't retry. Make it idempotent
|
||||
// once retry is implemented.
|
||||
@@ -2106,7 +2105,7 @@ void NodeManager::CommitBundle(
|
||||
}
|
||||
|
||||
void NodeManager::ScheduleTasks(
|
||||
std::unordered_map<ClientID, SchedulingResources> &resource_map) {
|
||||
std::unordered_map<NodeID, SchedulingResources> &resource_map) {
|
||||
// If the resource map contains the local raylet, update load before calling policy.
|
||||
if (resource_map.count(self_node_id_) > 0) {
|
||||
resource_map[self_node_id_].SetLoadResources(local_queues_.GetTotalResourceLoad());
|
||||
@@ -2118,17 +2117,17 @@ void NodeManager::ScheduleTasks(
|
||||
RAY_LOG(DEBUG) << "[NM ScheduleTasks] policy decision:";
|
||||
for (const auto &task_client_pair : policy_decision) {
|
||||
TaskID task_id = task_client_pair.first;
|
||||
ClientID node_id = task_client_pair.second;
|
||||
NodeID node_id = task_client_pair.second;
|
||||
RAY_LOG(DEBUG) << task_id << " --> " << node_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Extract decision for this raylet.
|
||||
std::unordered_set<TaskID> local_task_ids;
|
||||
// Iterate over (taskid, clientid) pairs, extract tasks assigned to the local node.
|
||||
// Iterate over (taskid, nodeid) pairs, extract tasks assigned to the local node.
|
||||
for (const auto &task_client_pair : policy_decision) {
|
||||
const TaskID &task_id = task_client_pair.first;
|
||||
const ClientID &node_id = task_client_pair.second;
|
||||
const NodeID &node_id = task_client_pair.second;
|
||||
if (node_id == self_node_id_) {
|
||||
local_task_ids.insert(task_id);
|
||||
} else {
|
||||
@@ -2300,7 +2299,7 @@ void NodeManager::TreatTaskAsFailedIfLost(const Task &task) {
|
||||
object_id, spec.CallerAddress(),
|
||||
[this, task_marked_as_failed, task](
|
||||
const ray::ObjectID &object_id,
|
||||
const std::unordered_set<ray::ClientID> &clients) {
|
||||
const std::unordered_set<ray::NodeID> &clients) {
|
||||
if (!*task_marked_as_failed) {
|
||||
// Only process the object locations if we haven't already marked the
|
||||
// task as failed.
|
||||
@@ -2604,7 +2603,7 @@ void NodeManager::AssignTask(const std::shared_ptr<WorkerInterface> &worker,
|
||||
worker->SetPlacementGroupId(spec.PlacementGroupId());
|
||||
|
||||
const auto owner_worker_id = WorkerID::FromBinary(spec.CallerAddress().worker_id());
|
||||
const auto owner_node_id = ClientID::FromBinary(spec.CallerAddress().raylet_id());
|
||||
const auto owner_node_id = NodeID::FromBinary(spec.CallerAddress().raylet_id());
|
||||
RAY_CHECK(!owner_worker_id.IsNil());
|
||||
RAY_LOG(DEBUG) << "Worker lease request DISPATCH " << task_id << " to worker "
|
||||
<< worker->WorkerId() << ", owner ID " << owner_worker_id;
|
||||
@@ -2935,8 +2934,7 @@ void NodeManager::HandleObjectMissing(const ObjectID &object_id) {
|
||||
}
|
||||
}
|
||||
|
||||
void NodeManager::ForwardTaskOrResubmit(const Task &task,
|
||||
const ClientID &node_manager_id) {
|
||||
void NodeManager::ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id) {
|
||||
// Attempt to forward the task.
|
||||
// TODO(sang): Modify method names.
|
||||
ForwardTask(task, node_manager_id,
|
||||
@@ -2957,7 +2955,7 @@ void NodeManager::ForwardTaskOrResubmit(const Task &task,
|
||||
}
|
||||
|
||||
void NodeManager::ForwardTask(
|
||||
const Task &task, const ClientID &node_id,
|
||||
const Task &task, const NodeID &node_id,
|
||||
const std::function<void(const ray::Status &, const Task &)> &on_error) {
|
||||
// This method spillbacks lease requests to other nodes.
|
||||
// TODO(sang): Modify method names.
|
||||
|
||||
@@ -127,7 +127,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
///
|
||||
/// \param resource_config The initial set of node resources.
|
||||
/// \param object_manager A reference to the local object manager.
|
||||
NodeManager(boost::asio::io_service &io_service, const ClientID &self_node_id,
|
||||
NodeManager(boost::asio::io_service &io_service, const NodeID &self_node_id,
|
||||
const NodeManagerConfig &config, ObjectManager &object_manager,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client,
|
||||
std::shared_ptr<ObjectDirectoryInterface> object_directory_);
|
||||
@@ -191,14 +191,14 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// \param client_id ID of the node that created or updated resources.
|
||||
/// \param createUpdatedResources Created or updated resources.
|
||||
/// \return Void.
|
||||
void ResourceCreateUpdated(const ClientID &client_id,
|
||||
void ResourceCreateUpdated(const NodeID &client_id,
|
||||
const ResourceSet &createUpdatedResources);
|
||||
|
||||
/// Handler for the deletion of a resource in the GCS
|
||||
/// \param client_id ID of the node that deleted resources.
|
||||
/// \param resource_names Names of deleted resources.
|
||||
/// \return Void.
|
||||
void ResourceDeleted(const ClientID &client_id,
|
||||
void ResourceDeleted(const NodeID &client_id,
|
||||
const std::vector<std::string> &resource_names);
|
||||
|
||||
/// Evaluates the local infeasible queue to check if any tasks can be scheduled.
|
||||
@@ -226,7 +226,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// \param id The ID of the node manager that sent the heartbeat.
|
||||
/// \param data The heartbeat data including load information.
|
||||
/// \return Void.
|
||||
void HeartbeatAdded(const ClientID &id, const HeartbeatTableData &data);
|
||||
void HeartbeatAdded(const NodeID &id, const HeartbeatTableData &data);
|
||||
/// Handler for a heartbeat batch notification from the GCS
|
||||
///
|
||||
/// \param heartbeat_batch The batch of heartbeat data.
|
||||
@@ -308,7 +308,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// consider the local node manager and the node managers in the keys of the
|
||||
/// resource_map argument.
|
||||
/// \return Void.
|
||||
void ScheduleTasks(std::unordered_map<ClientID, SchedulingResources> &resource_map);
|
||||
void ScheduleTasks(std::unordered_map<NodeID, SchedulingResources> &resource_map);
|
||||
|
||||
/// Make a placement decision for the resource_map and subtract original resources so
|
||||
/// that the node is ready to commit (create) placement group resources.
|
||||
@@ -319,7 +319,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// resource_map argument.
|
||||
/// \param bundle_spec Specification of bundle that will be prepared.
|
||||
/// \return True is resources were successfully prepared. False otherwise.
|
||||
bool PrepareBundle(std::unordered_map<ClientID, SchedulingResources> &resource_map,
|
||||
bool PrepareBundle(std::unordered_map<NodeID, SchedulingResources> &resource_map,
|
||||
const BundleSpecification &bundle_spec);
|
||||
|
||||
/// Make a placement decision for the resource_map.
|
||||
@@ -329,7 +329,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// consider the local node manager and the node managers in the keys of the
|
||||
/// resource_map argument.
|
||||
/// \param bundle_spec Specification of bundle that will be prepared.
|
||||
void CommitBundle(std::unordered_map<ClientID, SchedulingResources> &resource_map,
|
||||
void CommitBundle(std::unordered_map<NodeID, SchedulingResources> &resource_map,
|
||||
const BundleSpecification &bundle_spec);
|
||||
|
||||
/// Handle a task whose return value(s) must be reconstructed.
|
||||
@@ -346,7 +346,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// \param task The task in question.
|
||||
/// \param node_manager_id The ID of the remote node manager.
|
||||
/// \return Void.
|
||||
void ForwardTaskOrResubmit(const Task &task, const ClientID &node_manager_id);
|
||||
void ForwardTaskOrResubmit(const Task &task, const NodeID &node_manager_id);
|
||||
/// Forward a task to another node to execute. The task is assumed to not be
|
||||
/// queued in local_queues_.
|
||||
///
|
||||
@@ -354,7 +354,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// \param node_id The ID of the node to forward the task to.
|
||||
/// \param on_error Callback on run on non-ok status.
|
||||
void ForwardTask(
|
||||
const Task &task, const ClientID &node_id,
|
||||
const Task &task, const NodeID &node_id,
|
||||
const std::function<void(const ray::Status &, const Task &)> &on_error);
|
||||
|
||||
/// Dispatch locally scheduled tasks. This attempts the transition from "scheduled" to
|
||||
@@ -681,7 +681,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
bool IsActorCreationTask(const TaskID &task_id);
|
||||
|
||||
/// ID of this node.
|
||||
ClientID self_node_id_;
|
||||
NodeID self_node_id_;
|
||||
boost::asio::io_service &io_service_;
|
||||
ObjectManager &object_manager_;
|
||||
/// A Plasma object store client. This is used for creating new objects in
|
||||
@@ -733,7 +733,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
const NodeManagerConfig initial_config_;
|
||||
/// The resources (and specific resource IDs) that are currently available.
|
||||
ResourceIdSet local_available_resources_;
|
||||
std::unordered_map<ClientID, SchedulingResources> cluster_resource_map_;
|
||||
std::unordered_map<NodeID, SchedulingResources> cluster_resource_map_;
|
||||
|
||||
/// A pool of workers.
|
||||
WorkerPool worker_pool_;
|
||||
@@ -772,7 +772,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
rpc::ClientCallManager client_call_manager_;
|
||||
|
||||
/// Map from node ids to clients of the remote node managers.
|
||||
std::unordered_map<ClientID, std::unique_ptr<rpc::NodeManagerClient>>
|
||||
std::unordered_map<NodeID, std::unique_ptr<rpc::NodeManagerClient>>
|
||||
remote_node_manager_clients_;
|
||||
|
||||
/// Map of workers leased out to direct call clients.
|
||||
@@ -813,7 +813,7 @@ class NodeManager : public rpc::NodeManagerServiceHandler {
|
||||
/// Cache for the WorkerTable in the GCS.
|
||||
absl::flat_hash_set<WorkerID> failed_workers_cache_;
|
||||
/// Cache for the ClientTable in the GCS.
|
||||
absl::flat_hash_set<ClientID> failed_nodes_cache_;
|
||||
absl::flat_hash_set<NodeID> failed_nodes_cache_;
|
||||
|
||||
/// Concurrency for the following map
|
||||
mutable absl::Mutex plasma_object_notification_lock_;
|
||||
|
||||
@@ -128,14 +128,14 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
|
||||
|
||||
int num_connected_clients = 0;
|
||||
|
||||
ClientID node_id_1;
|
||||
ClientID node_id_2;
|
||||
NodeID node_id_1;
|
||||
NodeID node_id_2;
|
||||
|
||||
void WaitConnections() {
|
||||
node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
gcs_client_1->Nodes().AsyncSubscribeToNodeChange(
|
||||
[this](const ClientID &node_id, const rpc::GcsNodeInfo &data) {
|
||||
[this](const NodeID &node_id, const rpc::GcsNodeInfo &data) {
|
||||
if (node_id == node_id_1 || node_id == node_id_2) {
|
||||
num_connected_clients += 1;
|
||||
}
|
||||
@@ -196,8 +196,8 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
|
||||
RAY_LOG(INFO) << "\n"
|
||||
<< "Server client ids:"
|
||||
<< "\n";
|
||||
ClientID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
ClientID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
NodeID node_id_1 = gcs_client_1->Nodes().GetSelfId();
|
||||
NodeID node_id_2 = gcs_client_2->Nodes().GetSelfId();
|
||||
RAY_LOG(INFO) << "Server 1: " << node_id_1;
|
||||
RAY_LOG(INFO) << "Server 2: " << node_id_2;
|
||||
|
||||
@@ -205,13 +205,13 @@ class TestObjectManagerIntegration : public TestObjectManagerBase {
|
||||
<< "All connected clients:"
|
||||
<< "\n";
|
||||
auto data = gcs_client_2->Nodes().Get(node_id_1);
|
||||
RAY_LOG(INFO) << (ClientID::FromBinary(data->node_id()).IsNil());
|
||||
RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data->node_id());
|
||||
RAY_LOG(INFO) << (NodeID::FromBinary(data->node_id()).IsNil());
|
||||
RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data->node_id());
|
||||
RAY_LOG(INFO) << "ClientIp=" << data->node_manager_address();
|
||||
RAY_LOG(INFO) << "ClientPort=" << data->node_manager_port();
|
||||
rpc::GcsNodeInfo data2;
|
||||
gcs_client_1->Nodes().Get(node_id_2);
|
||||
RAY_LOG(INFO) << "ClientID=" << ClientID::FromBinary(data2->node_id());
|
||||
RAY_LOG(INFO) << "NodeID=" << NodeID::FromBinary(data2->node_id());
|
||||
RAY_LOG(INFO) << "ClientIp=" << data2->node_manager_address();
|
||||
RAY_LOG(INFO) << "ClientPort=" << data2->node_manager_port();
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ Raylet::Raylet(boost::asio::io_service &main_service, const std::string &socket_
|
||||
const NodeManagerConfig &node_manager_config,
|
||||
const ObjectManagerConfig &object_manager_config,
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client, int metrics_export_port)
|
||||
: self_node_id_(ClientID::FromRandom()),
|
||||
: self_node_id_(NodeID::FromRandom()),
|
||||
gcs_client_(gcs_client),
|
||||
object_directory_(
|
||||
RayConfig::instance().ownership_based_object_directory_enabled()
|
||||
|
||||
@@ -77,7 +77,7 @@ class Raylet {
|
||||
friend class TestObjectManagerIntegration;
|
||||
|
||||
/// ID of this node.
|
||||
ClientID self_node_id_;
|
||||
NodeID self_node_id_;
|
||||
/// Information of this node.
|
||||
GcsNodeInfo self_node_info_;
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user