mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 18:06:25 +08:00
[GCS] Delete redis gcs client and redis_xxx_accessor (#12996)
This commit is contained in:
-106
@@ -1020,7 +1020,6 @@ cc_test(
|
||||
cc_library(
|
||||
name = "gcs_test_util_lib",
|
||||
hdrs = [
|
||||
"src/ray/gcs/test/accessor_test_base.h",
|
||||
"src/ray/gcs/test/gcs_test_util.h",
|
||||
],
|
||||
copts = COPTS,
|
||||
@@ -1621,111 +1620,6 @@ cc_library(
|
||||
],
|
||||
)
|
||||
|
||||
# TODO(micafan) Support test group in future. Use test group we can run all gcs test once.
|
||||
cc_test(
|
||||
name = "redis_gcs_client_test",
|
||||
srcs = ["src/ray/gcs/test/redis_gcs_client_test.cc"],
|
||||
args = [
|
||||
"$(location redis-server)",
|
||||
"$(location redis-cli)",
|
||||
"$(location libray_redis_module.so)",
|
||||
],
|
||||
copts = COPTS,
|
||||
data = [
|
||||
"//:libray_redis_module.so",
|
||||
"//:redis-cli",
|
||||
"//:redis-server",
|
||||
],
|
||||
deps = [
|
||||
":gcs",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "redis_actor_info_accessor_test",
|
||||
srcs = ["src/ray/gcs/test/redis_actor_info_accessor_test.cc"],
|
||||
args = [
|
||||
"$(location redis-server)",
|
||||
"$(location redis-cli)",
|
||||
"$(location libray_redis_module.so)",
|
||||
],
|
||||
copts = COPTS,
|
||||
data = [
|
||||
"//:libray_redis_module.so",
|
||||
"//:redis-cli",
|
||||
"//:redis-server",
|
||||
],
|
||||
deps = [
|
||||
":gcs",
|
||||
":gcs_test_util_lib",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "redis_object_info_accessor_test",
|
||||
srcs = ["src/ray/gcs/test/redis_object_info_accessor_test.cc"],
|
||||
args = [
|
||||
"$(location redis-server)",
|
||||
"$(location redis-cli)",
|
||||
"$(location libray_redis_module.so)",
|
||||
],
|
||||
copts = COPTS,
|
||||
data = [
|
||||
"//:libray_redis_module.so",
|
||||
"//:redis-cli",
|
||||
"//:redis-server",
|
||||
],
|
||||
deps = [
|
||||
":gcs",
|
||||
":gcs_test_util_lib",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "redis_job_info_accessor_test",
|
||||
srcs = ["src/ray/gcs/test/redis_job_info_accessor_test.cc"],
|
||||
args = [
|
||||
"$(location redis-server)",
|
||||
"$(location redis-cli)",
|
||||
"$(location libray_redis_module.so)",
|
||||
],
|
||||
copts = COPTS,
|
||||
data = [
|
||||
"//:libray_redis_module.so",
|
||||
"//:redis-cli",
|
||||
"//:redis-server",
|
||||
],
|
||||
deps = [
|
||||
":gcs",
|
||||
":gcs_test_util_lib",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "redis_node_info_accessor_test",
|
||||
srcs = ["src/ray/gcs/test/redis_node_info_accessor_test.cc"],
|
||||
args = [
|
||||
"$(location redis-server)",
|
||||
"$(location redis-cli)",
|
||||
"$(location libray_redis_module.so)",
|
||||
],
|
||||
copts = COPTS,
|
||||
data = [
|
||||
"//:libray_redis_module.so",
|
||||
"//:redis-cli",
|
||||
"//:redis-server",
|
||||
],
|
||||
deps = [
|
||||
":gcs",
|
||||
":gcs_test_util_lib",
|
||||
"@com_google_googletest//:gtest_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "asio_test",
|
||||
srcs = ["src/ray/gcs/test/asio_test.cc"],
|
||||
|
||||
@@ -120,7 +120,6 @@ test_core() {
|
||||
case "${OSTYPE}" in
|
||||
msys)
|
||||
args+=(
|
||||
-//:redis_gcs_client_test
|
||||
-//:core_worker_test
|
||||
-//:event_test
|
||||
-//:gcs_pub_sub_test
|
||||
|
||||
@@ -45,7 +45,7 @@ ray::rpc::ActorHandle CreateInnerActorHandleFromString(const std::string &serial
|
||||
}
|
||||
|
||||
ray::rpc::ActorHandle CreateInnerActorHandleFromActorTableData(
|
||||
const ray::gcs::ActorTableData &actor_table_data) {
|
||||
const ray::rpc::ActorTableData &actor_table_data) {
|
||||
ray::rpc::ActorHandle inner;
|
||||
inner.set_actor_id(actor_table_data.actor_id());
|
||||
inner.set_owner_id(actor_table_data.parent_id());
|
||||
@@ -80,7 +80,7 @@ ActorHandle::ActorHandle(
|
||||
ActorHandle::ActorHandle(const std::string &serialized)
|
||||
: ActorHandle(CreateInnerActorHandleFromString(serialized)) {}
|
||||
|
||||
ActorHandle::ActorHandle(const gcs::ActorTableData &actor_table_data)
|
||||
ActorHandle::ActorHandle(const rpc::ActorTableData &actor_table_data)
|
||||
: ActorHandle(CreateInnerActorHandleFromActorTableData(actor_table_data)) {}
|
||||
|
||||
void ActorHandle::SetActorTaskSpec(TaskSpecBuilder &builder, const ObjectID new_cursor) {
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
#include "ray/common/task/task_util.h"
|
||||
#include "ray/core_worker/common.h"
|
||||
#include "ray/core_worker/context.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "src/ray/protobuf/core_worker.pb.h"
|
||||
#include "src/ray/protobuf/gcs.pb.h"
|
||||
|
||||
@@ -42,7 +41,7 @@ class ActorHandle {
|
||||
ActorHandle(const std::string &serialized);
|
||||
|
||||
/// Constructs an ActorHandle from a gcs::ActorTableData message.
|
||||
ActorHandle(const gcs::ActorTableData &actor_table_data);
|
||||
ActorHandle(const rpc::ActorTableData &actor_table_data);
|
||||
|
||||
ActorID GetActorID() const { return ActorID::FromBinary(inner_.actor_id()); };
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "ray/core_worker/actor_manager.h"
|
||||
|
||||
#include "ray/gcs/pb_util.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -124,8 +123,8 @@ void ActorManager::WaitForActorOutOfScope(
|
||||
}
|
||||
|
||||
void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
|
||||
const gcs::ActorTableData &actor_data) {
|
||||
const auto &actor_state = gcs::ActorTableData::ActorState_Name(actor_data.state());
|
||||
const rpc::ActorTableData &actor_data) {
|
||||
const auto &actor_state = rpc::ActorTableData::ActorState_Name(actor_data.state());
|
||||
RAY_LOG(INFO) << "received notification on actor, state: " << actor_state
|
||||
<< ", actor_id: " << actor_id
|
||||
<< ", ip address: " << actor_data.address().ip_address()
|
||||
@@ -133,14 +132,14 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
|
||||
<< WorkerID::FromBinary(actor_data.address().worker_id())
|
||||
<< ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id())
|
||||
<< ", num_restarts: " << actor_data.num_restarts();
|
||||
if (actor_data.state() == gcs::ActorTableData::RESTARTING) {
|
||||
if (actor_data.state() == rpc::ActorTableData::RESTARTING) {
|
||||
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false);
|
||||
} else if (actor_data.state() == gcs::ActorTableData::DEAD) {
|
||||
} else if (actor_data.state() == rpc::ActorTableData::DEAD) {
|
||||
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), true);
|
||||
// We cannot erase the actor handle here because clients can still
|
||||
// submit tasks to dead actors. This also means we defer unsubscription,
|
||||
// otherwise we crash when bulk unsubscribing all actor handles.
|
||||
} else if (actor_data.state() == gcs::ActorTableData::ALIVE) {
|
||||
} else if (actor_data.state() == rpc::ActorTableData::ALIVE) {
|
||||
direct_actor_submitter_->ConnectActor(actor_id, actor_data.address(),
|
||||
actor_data.num_restarts());
|
||||
} else {
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include "ray/core_worker/actor_handle.h"
|
||||
#include "ray/core_worker/reference_count.h"
|
||||
#include "ray/core_worker/transport/direct_actor_transport.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -177,7 +177,7 @@ class ActorManager {
|
||||
/// \param[in] actor_id The actor id of this notification.
|
||||
/// \param[in] actor_data The GCS actor data.
|
||||
void HandleActorStateNotification(const ActorID &actor_id,
|
||||
const gcs::ActorTableData &actor_data);
|
||||
const rpc::ActorTableData &actor_data);
|
||||
|
||||
/// GCS client.
|
||||
std::shared_ptr<gcs::GcsClient> gcs_client_;
|
||||
|
||||
@@ -464,7 +464,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
|
||||
TaskID::ComputeDriverTaskId(worker_context_.GetWorkerID()),
|
||||
GetCallerId(), rpc_address_);
|
||||
|
||||
std::shared_ptr<gcs::TaskTableData> data = std::make_shared<gcs::TaskTableData>();
|
||||
std::shared_ptr<rpc::TaskTableData> data = std::make_shared<rpc::TaskTableData>();
|
||||
data->mutable_task()->mutable_task_spec()->CopyFrom(builder.Build().GetMessage());
|
||||
if (!options_.is_local_mode) {
|
||||
RAY_CHECK_OK(gcs_client_->Tasks().AsyncAdd(data, nullptr));
|
||||
@@ -1639,7 +1639,7 @@ std::pair<const ActorHandle *, Status> CoreWorker::GetNamedActorHandle(
|
||||
std::make_shared<std::promise<void>>(std::promise<void>());
|
||||
RAY_CHECK_OK(gcs_client_->Actors().AsyncGetByName(
|
||||
name, [this, &actor_id, name, ready_promise](
|
||||
Status status, const boost::optional<gcs::ActorTableData> &result) {
|
||||
Status status, const boost::optional<rpc::ActorTableData> &result) {
|
||||
if (status.ok() && result) {
|
||||
auto actor_handle = std::unique_ptr<ActorHandle>(new ActorHandle(*result));
|
||||
actor_id = actor_handle->GetActorID();
|
||||
|
||||
@@ -30,8 +30,7 @@
|
||||
#include "ray/core_worker/store_provider/plasma_store_provider.h"
|
||||
#include "ray/core_worker/transport/direct_actor_transport.h"
|
||||
#include "ray/core_worker/transport/direct_task_transport.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/subscription_executor.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/raylet_client/raylet_client.h"
|
||||
#include "ray/rpc/node_manager/node_manager_client.h"
|
||||
#include "ray/rpc/worker/core_worker_client.h"
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
#include "absl/synchronization/mutex.h"
|
||||
#include "absl/time/clock.h"
|
||||
#include "ray/core_worker/context.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
|
||||
@@ -20,17 +20,17 @@
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/core_worker/reference_count.h"
|
||||
#include "ray/core_worker/transport/direct_actor_transport.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client/service_based_accessor.h"
|
||||
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
using ::testing::_;
|
||||
|
||||
class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
|
||||
class MockActorInfoAccessor : public gcs::ServiceBasedActorInfoAccessor {
|
||||
public:
|
||||
MockActorInfoAccessor(gcs::RedisGcsClient *client)
|
||||
: gcs::RedisActorInfoAccessor(client) {}
|
||||
MockActorInfoAccessor(gcs::ServiceBasedGcsClient *client)
|
||||
: gcs::ServiceBasedActorInfoAccessor(client) {}
|
||||
|
||||
~MockActorInfoAccessor() {}
|
||||
|
||||
@@ -44,7 +44,7 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
|
||||
}
|
||||
|
||||
bool ActorStateNotificationPublished(const ActorID &actor_id,
|
||||
const gcs::ActorTableData &actor_data) {
|
||||
const rpc::ActorTableData &actor_data) {
|
||||
auto it = callback_map_.find(actor_id);
|
||||
if (it == callback_map_.end()) return false;
|
||||
auto actor_state_notification_callback = it->second;
|
||||
@@ -60,15 +60,13 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
|
||||
callback_map_;
|
||||
};
|
||||
|
||||
class MockGcsClient : public gcs::RedisGcsClient {
|
||||
class MockGcsClient : public gcs::ServiceBasedGcsClient {
|
||||
public:
|
||||
MockGcsClient(const gcs::GcsClientOptions &options) : gcs::RedisGcsClient(options) {}
|
||||
MockGcsClient(gcs::GcsClientOptions options) : gcs::ServiceBasedGcsClient(options) {}
|
||||
|
||||
void Init(MockActorInfoAccessor *actor_accesor_mock) {
|
||||
actor_accessor_.reset(actor_accesor_mock);
|
||||
void Init(MockActorInfoAccessor *actor_info_accessor) {
|
||||
actor_accessor_.reset(actor_info_accessor);
|
||||
}
|
||||
|
||||
~MockGcsClient() {}
|
||||
};
|
||||
|
||||
class MockDirectActorSubmitter : public CoreWorkerDirectActorTaskSubmitterInterface {
|
||||
|
||||
@@ -223,7 +223,6 @@ TEST_F(DirectActorSubmitterTest, TestActorDead) {
|
||||
addr.set_worker_id(worker_id.Binary());
|
||||
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
|
||||
submitter_.AddActorQueueIfNotExists(actor_id);
|
||||
gcs::ActorTableData actor_data;
|
||||
submitter_.ConnectActor(actor_id, addr, 0);
|
||||
ASSERT_EQ(worker_client_->callbacks.size(), 0);
|
||||
|
||||
@@ -256,7 +255,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartNoRetry) {
|
||||
addr.set_worker_id(worker_id.Binary());
|
||||
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
|
||||
submitter_.AddActorQueueIfNotExists(actor_id);
|
||||
gcs::ActorTableData actor_data;
|
||||
addr.set_port(0);
|
||||
submitter_.ConnectActor(actor_id, addr, 0);
|
||||
ASSERT_EQ(worker_client_->callbacks.size(), 0);
|
||||
@@ -299,7 +297,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartRetry) {
|
||||
addr.set_worker_id(worker_id.Binary());
|
||||
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
|
||||
submitter_.AddActorQueueIfNotExists(actor_id);
|
||||
gcs::ActorTableData actor_data;
|
||||
addr.set_port(0);
|
||||
submitter_.ConnectActor(actor_id, addr, 0);
|
||||
ASSERT_EQ(worker_client_->callbacks.size(), 0);
|
||||
@@ -351,7 +348,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderRetry) {
|
||||
addr.set_worker_id(worker_id.Binary());
|
||||
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
|
||||
submitter_.AddActorQueueIfNotExists(actor_id);
|
||||
gcs::ActorTableData actor_data;
|
||||
addr.set_port(0);
|
||||
submitter_.ConnectActor(actor_id, addr, 0);
|
||||
ASSERT_EQ(worker_client_->callbacks.size(), 0);
|
||||
@@ -401,7 +397,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderGcs) {
|
||||
addr.set_worker_id(worker_id.Binary());
|
||||
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
|
||||
submitter_.AddActorQueueIfNotExists(actor_id);
|
||||
gcs::ActorTableData actor_data;
|
||||
addr.set_port(0);
|
||||
submitter_.ConnectActor(actor_id, addr, 0);
|
||||
ASSERT_EQ(worker_client_->callbacks.size(), 0);
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
#include "ray/core_worker/store_provider/memory_store/memory_store.h"
|
||||
#include "ray/core_worker/task_manager.h"
|
||||
#include "ray/core_worker/transport/dependency_resolver.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/grpc_server.h"
|
||||
#include "ray/rpc/worker/core_worker_client.h"
|
||||
|
||||
|
||||
@@ -225,7 +225,7 @@ std::vector<std::string> GlobalStateAccessor::GetAllWorkerInfo() {
|
||||
}
|
||||
|
||||
bool GlobalStateAccessor::AddWorkerInfo(const std::string &serialized_string) {
|
||||
auto data_ptr = std::make_shared<WorkerTableData>();
|
||||
auto data_ptr = std::make_shared<rpc::WorkerTableData>();
|
||||
data_ptr->ParseFromString(serialized_string);
|
||||
std::promise<bool> promise;
|
||||
RAY_CHECK_OK(
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
namespace ray {
|
||||
namespace gcs {
|
||||
|
||||
using namespace ray::rpc;
|
||||
|
||||
ServiceBasedJobInfoAccessor::ServiceBasedJobInfoAccessor(
|
||||
ServiceBasedGcsClient *client_impl)
|
||||
: client_impl_(client_impl) {}
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#include "ray/common/task/task_spec.h"
|
||||
#include "ray/gcs/accessor.h"
|
||||
#include "ray/gcs/subscription_executor.h"
|
||||
#include "ray/util/sequencer.h"
|
||||
#include "src/ray/protobuf/gcs_service.pb.h"
|
||||
|
||||
@@ -38,12 +37,12 @@ class ServiceBasedJobInfoAccessor : public JobInfoAccessor {
|
||||
|
||||
virtual ~ServiceBasedJobInfoAccessor() = default;
|
||||
|
||||
Status AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
|
||||
Status AsyncAdd(const std::shared_ptr<rpc::JobTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeAll(const SubscribeCallback<JobID, JobTableData> &subscribe,
|
||||
Status AsyncSubscribeAll(const SubscribeCallback<JobID, rpc::JobTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::JobTableData> &callback) override;
|
||||
@@ -71,7 +70,7 @@ class ServiceBasedActorInfoAccessor : public ActorInfoAccessor {
|
||||
|
||||
virtual ~ServiceBasedActorInfoAccessor() = default;
|
||||
|
||||
Status GetAll(std::vector<ActorTableData> *actor_table_data_list) override;
|
||||
Status GetAll(std::vector<rpc::ActorTableData> *actor_table_data_list) override;
|
||||
|
||||
Status AsyncGet(const ActorID &actor_id,
|
||||
const OptionalItemCallback<rpc::ActorTableData> &callback) override;
|
||||
@@ -136,30 +135,30 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
|
||||
|
||||
virtual ~ServiceBasedNodeInfoAccessor() = default;
|
||||
|
||||
Status RegisterSelf(const GcsNodeInfo &local_node_info,
|
||||
Status RegisterSelf(const rpc::GcsNodeInfo &local_node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status UnregisterSelf() override;
|
||||
|
||||
const NodeID &GetSelfId() const override;
|
||||
|
||||
const GcsNodeInfo &GetSelfInfo() const override;
|
||||
const rpc::GcsNodeInfo &GetSelfInfo() const override;
|
||||
|
||||
Status AsyncRegister(const rpc::GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::GcsNodeInfo> &callback) override;
|
||||
|
||||
Status AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
boost::optional<GcsNodeInfo> Get(const NodeID &node_id,
|
||||
bool filter_dead_nodes = false) const override;
|
||||
boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id,
|
||||
bool filter_dead_nodes = false) const override;
|
||||
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
|
||||
const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const override;
|
||||
|
||||
bool IsRemoved(const NodeID &node_id) const override;
|
||||
|
||||
@@ -207,21 +206,21 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
|
||||
/// from a failure.
|
||||
rpc::ReportResourceUsageRequest cached_resource_usage_ GUARDED_BY(mutex_);
|
||||
|
||||
void HandleNotification(const GcsNodeInfo &node_info);
|
||||
void HandleNotification(const rpc::GcsNodeInfo &node_info);
|
||||
|
||||
ServiceBasedGcsClient *client_impl_;
|
||||
|
||||
using NodeChangeCallback =
|
||||
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
|
||||
std::function<void(const NodeID &id, const rpc::GcsNodeInfo &node_info)>;
|
||||
|
||||
GcsNodeInfo local_node_info_;
|
||||
rpc::GcsNodeInfo local_node_info_;
|
||||
NodeID local_node_id_;
|
||||
|
||||
/// The callback to call when a new node is added or a node is removed.
|
||||
NodeChangeCallback node_change_callback_{nullptr};
|
||||
|
||||
/// A cache for information about all nodes.
|
||||
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
|
||||
std::unordered_map<NodeID, rpc::GcsNodeInfo> node_cache_;
|
||||
/// The set of removed nodes.
|
||||
std::unordered_set<NodeID> removed_nodes_;
|
||||
};
|
||||
|
||||
@@ -37,21 +37,23 @@ Status ServiceBasedGcsClient::Connect(boost::asio::io_service &io_service) {
|
||||
return Status::Invalid("gcs service address is invalid!");
|
||||
}
|
||||
|
||||
// Connect to gcs.
|
||||
redis_gcs_client_.reset(new RedisGcsClient(options_));
|
||||
RAY_CHECK_OK(redis_gcs_client_->Connect(io_service));
|
||||
// Connect to redis.
|
||||
RedisClientOptions redis_client_options(options_.server_ip_, options_.server_port_,
|
||||
options_.password_, options_.is_test_client_);
|
||||
redis_client_.reset(new RedisClient(redis_client_options));
|
||||
RAY_CHECK_OK(redis_client_->Connect(io_service));
|
||||
|
||||
// Init gcs pub sub instance.
|
||||
gcs_pub_sub_.reset(new GcsPubSub(redis_gcs_client_->GetRedisClient()));
|
||||
gcs_pub_sub_.reset(new GcsPubSub(redis_client_));
|
||||
|
||||
// Get gcs service address.
|
||||
get_server_address_func_ = [this](std::pair<std::string, int> *address) {
|
||||
return GetGcsServerAddressFromRedis(
|
||||
redis_gcs_client_->primary_context()->sync_context(), address);
|
||||
redis_client_->GetPrimaryContext()->sync_context(), address);
|
||||
};
|
||||
std::pair<std::string, int> address;
|
||||
RAY_CHECK(GetGcsServerAddressFromRedis(
|
||||
redis_gcs_client_->primary_context()->sync_context(), &address,
|
||||
redis_client_->GetPrimaryContext()->sync_context(), &address,
|
||||
RayConfig::instance().gcs_service_connect_retries()))
|
||||
<< "Failed to get gcs server address when init gcs client.";
|
||||
|
||||
@@ -96,8 +98,8 @@ void ServiceBasedGcsClient::Disconnect() {
|
||||
is_connected_ = false;
|
||||
detect_timer_->cancel();
|
||||
gcs_pub_sub_.reset();
|
||||
redis_gcs_client_->Disconnect();
|
||||
redis_gcs_client_.reset();
|
||||
redis_client_->Disconnect();
|
||||
redis_client_.reset();
|
||||
RAY_LOG(DEBUG) << "ServiceBasedGcsClient Disconnected.";
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/redis_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_client.h"
|
||||
|
||||
namespace ray {
|
||||
@@ -31,8 +32,6 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient {
|
||||
|
||||
GcsPubSub &GetGcsPubSub() { return *gcs_pub_sub_; }
|
||||
|
||||
RedisGcsClient &GetRedisGcsClient() { return *redis_gcs_client_; }
|
||||
|
||||
rpc::GcsRpcClient &GetGcsRpcClient() { return *gcs_rpc_client_; }
|
||||
|
||||
private:
|
||||
@@ -59,7 +58,7 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient {
|
||||
/// Reconnect to GCS RPC server.
|
||||
void ReconnectGcsServer();
|
||||
|
||||
std::unique_ptr<RedisGcsClient> redis_gcs_client_;
|
||||
std::shared_ptr<RedisClient> redis_client_;
|
||||
|
||||
std::unique_ptr<GcsPubSub> gcs_pub_sub_;
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "ray/gcs/gcs_server/gcs_init_data.h"
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
#include "ray/rpc/worker/core_worker_client.h"
|
||||
#include "src/ray/protobuf/gcs_service.pb.h"
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "ray/gcs/gcs_server/gcs_object_manager.h"
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "ray/gcs/gcs_server/gcs_node_manager.h"
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
|
||||
@@ -43,23 +43,22 @@ GcsServer::~GcsServer() { Stop(); }
|
||||
|
||||
void GcsServer::Start() {
|
||||
// Init backend client.
|
||||
GcsClientOptions options(config_.redis_address, config_.redis_port,
|
||||
config_.redis_password, config_.is_test);
|
||||
redis_gcs_client_ = std::make_shared<RedisGcsClient>(options);
|
||||
auto status = redis_gcs_client_->Connect(main_service_);
|
||||
RedisClientOptions redis_client_options(config_.redis_address, config_.redis_port,
|
||||
config_.redis_password, config_.is_test);
|
||||
redis_client_ = std::make_shared<RedisClient>(redis_client_options);
|
||||
auto status = redis_client_->Connect(main_service_);
|
||||
RAY_CHECK(status.ok()) << "Failed to init redis gcs client as " << status;
|
||||
|
||||
// Init redis failure detector.
|
||||
gcs_redis_failure_detector_ = std::make_shared<GcsRedisFailureDetector>(
|
||||
main_service_, redis_gcs_client_->primary_context(), [this]() { Stop(); });
|
||||
main_service_, redis_client_->GetPrimaryContext(), [this]() { Stop(); });
|
||||
gcs_redis_failure_detector_->Start();
|
||||
|
||||
// Init gcs pub sub instance.
|
||||
gcs_pub_sub_ = std::make_shared<gcs::GcsPubSub>(redis_gcs_client_->GetRedisClient());
|
||||
gcs_pub_sub_ = std::make_shared<gcs::GcsPubSub>(redis_client_);
|
||||
|
||||
// Init gcs table storage.
|
||||
gcs_table_storage_ =
|
||||
std::make_shared<gcs::RedisGcsTableStorage>(redis_gcs_client_->GetRedisClient());
|
||||
gcs_table_storage_ = std::make_shared<gcs::RedisGcsTableStorage>(redis_client_);
|
||||
|
||||
// Load gcs tables data asynchronously.
|
||||
auto gcs_init_data = std::make_shared<GcsInitData>(gcs_table_storage_);
|
||||
@@ -132,7 +131,7 @@ void GcsServer::Stop() {
|
||||
}
|
||||
|
||||
void GcsServer::InitGcsNodeManager(const GcsInitData &gcs_init_data) {
|
||||
RAY_CHECK(redis_gcs_client_ && gcs_table_storage_ && gcs_pub_sub_);
|
||||
RAY_CHECK(redis_client_ && gcs_table_storage_ && gcs_pub_sub_);
|
||||
gcs_node_manager_ =
|
||||
std::make_shared<GcsNodeManager>(main_service_, gcs_pub_sub_, gcs_table_storage_);
|
||||
// Initialize by gcs tables data.
|
||||
@@ -255,7 +254,7 @@ void GcsServer::StoreGcsServerAddressInRedis() {
|
||||
std::string address = ip + ":" + std::to_string(GetPort());
|
||||
RAY_LOG(INFO) << "Gcs server address = " << address;
|
||||
|
||||
RAY_CHECK_OK(redis_gcs_client_->primary_context()->RunArgvAsync(
|
||||
RAY_CHECK_OK(redis_client_->GetPrimaryContext()->RunArgvAsync(
|
||||
{"SET", "GcsServerAddress", address}));
|
||||
RAY_LOG(INFO) << "Finished setting gcs server address: " << address;
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "ray/gcs/gcs_server/gcs_resource_manager.h"
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/redis_client.h"
|
||||
#include "ray/rpc/client_call.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
#include "ray/rpc/node_manager/node_manager_client_pool.h"
|
||||
@@ -176,7 +176,7 @@ class GcsServer {
|
||||
/// Placement Group info handler and service
|
||||
std::unique_ptr<rpc::PlacementGroupInfoGrpcService> placement_group_info_service_;
|
||||
/// Backend client
|
||||
std::shared_ptr<RedisGcsClient> redis_gcs_client_;
|
||||
std::shared_ptr<RedisClient> redis_client_;
|
||||
/// A publisher for publishing gcs messages.
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
|
||||
/// The gcs table storage.
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#include "ray/common/ray_config.h"
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#include "ray/gcs/gcs_server/gcs_table_storage.h"
|
||||
#include "ray/gcs/pubsub/gcs_pub_sub.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -86,7 +86,6 @@ class GcsObjectManagerTest : public ::testing::Test {
|
||||
boost::asio::io_service io_service_;
|
||||
std::shared_ptr<gcs::GcsResourceManager> gcs_resource_manager_;
|
||||
std::shared_ptr<gcs::GcsNodeManager> gcs_node_manager_;
|
||||
std::shared_ptr<gcs::RedisGcsClient> gcs_client_;
|
||||
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
|
||||
std::shared_ptr<MockedGcsObjectManager> gcs_object_manager_;
|
||||
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
|
||||
|
||||
@@ -1,697 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
|
||||
#include <boost/none.hpp>
|
||||
|
||||
#include "ray/gcs/pb_util.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/util/logging.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
RedisLogBasedActorInfoAccessor::RedisLogBasedActorInfoAccessor(
|
||||
RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl),
|
||||
log_based_actor_sub_executor_(client_impl_->log_based_actor_table()) {}
|
||||
|
||||
std::vector<ActorID> RedisLogBasedActorInfoAccessor::GetAllActorID() const {
|
||||
return client_impl_->log_based_actor_table().GetAllActorID();
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::Get(const ActorID &actor_id,
|
||||
ActorTableData *actor_table_data) const {
|
||||
return client_impl_->log_based_actor_table().Get(actor_id, actor_table_data);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::GetAll(
|
||||
std::vector<ActorTableData> *actor_table_data_list) {
|
||||
RAY_CHECK(actor_table_data_list);
|
||||
auto actor_id_list = GetAllActorID();
|
||||
actor_table_data_list->resize(actor_id_list.size());
|
||||
for (size_t i = 0; i < actor_id_list.size(); ++i) {
|
||||
RAY_CHECK_OK(Get(actor_id_list[i], &(*actor_table_data_list)[i]));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncGet(
|
||||
const ActorID &actor_id, const OptionalItemCallback<ActorTableData> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id,
|
||||
const std::vector<ActorTableData> &data) {
|
||||
boost::optional<ActorTableData> result;
|
||||
if (!data.empty()) {
|
||||
result = data.back();
|
||||
}
|
||||
callback(Status::OK(), result);
|
||||
};
|
||||
|
||||
return client_impl_->log_based_actor_table().Lookup(actor_id.JobId(), actor_id,
|
||||
on_done);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncRegisterActor(
|
||||
const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) {
|
||||
const std::string error_msg =
|
||||
"Unsupported method of AsyncRegisterActor in RedisLogBasedActorInfoAccessor.";
|
||||
RAY_LOG(FATAL) << error_msg;
|
||||
return Status::Invalid(error_msg);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncCreateActor(
|
||||
const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) {
|
||||
const std::string error_msg =
|
||||
"Unsupported method of AsyncCreateActor in "
|
||||
"RedisLogBasedActorInfoAccessor.";
|
||||
RAY_LOG(FATAL) << error_msg;
|
||||
return Status::Invalid(error_msg);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncSubscribe(
|
||||
const ActorID &actor_id, const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return log_based_actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe,
|
||||
done);
|
||||
}
|
||||
|
||||
Status RedisLogBasedActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) {
|
||||
return log_based_actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr);
|
||||
}
|
||||
|
||||
RedisActorInfoAccessor::RedisActorInfoAccessor(RedisGcsClient *client_impl)
|
||||
: RedisLogBasedActorInfoAccessor(client_impl),
|
||||
actor_sub_executor_(client_impl_->actor_table()) {}
|
||||
|
||||
std::vector<ActorID> RedisActorInfoAccessor::GetAllActorID() const {
|
||||
return client_impl_->actor_table().GetAllActorID();
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::Get(const ActorID &actor_id,
|
||||
ActorTableData *actor_table_data) const {
|
||||
return client_impl_->actor_table().Get(actor_id, actor_table_data);
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncGet(
|
||||
const ActorID &actor_id, const OptionalItemCallback<ActorTableData> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id,
|
||||
const ActorTableData &data) { callback(Status::OK(), data); };
|
||||
|
||||
auto on_failure = [callback](RedisGcsClient *client, const ActorID &actor_id) {
|
||||
if (callback != nullptr) {
|
||||
callback(Status::Invalid("Get actor failed."), boost::none);
|
||||
}
|
||||
};
|
||||
|
||||
return client_impl_->actor_table().Lookup(JobID::Nil(), actor_id, on_done, on_failure);
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncGetAll(
|
||||
const MultiItemCallback<rpc::ActorTableData> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto actor_id_list = GetAllActorID();
|
||||
if (actor_id_list.empty()) {
|
||||
callback(Status::OK(), std::vector<rpc::ActorTableData>());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto finished_count = std::make_shared<int>(0);
|
||||
auto result = std::make_shared<std::vector<ActorTableData>>();
|
||||
int size = actor_id_list.size();
|
||||
for (auto &actor_id : actor_id_list) {
|
||||
auto on_done = [finished_count, size, result, callback](
|
||||
const Status &status,
|
||||
const boost::optional<ActorTableData> &data) {
|
||||
++(*finished_count);
|
||||
if (data) {
|
||||
result->push_back(*data);
|
||||
}
|
||||
if (*finished_count == size) {
|
||||
callback(Status::OK(), *result);
|
||||
}
|
||||
};
|
||||
RAY_CHECK_OK(AsyncGet(actor_id, on_done));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncSubscribe(
|
||||
const ActorID &actor_id, const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) {
|
||||
return actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr);
|
||||
}
|
||||
|
||||
RedisJobInfoAccessor::RedisJobInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl), job_sub_executor_(client_impl->job_table()) {}
|
||||
|
||||
Status RedisJobInfoAccessor::AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
return DoAsyncAppend(data_ptr, callback);
|
||||
}
|
||||
|
||||
Status RedisJobInfoAccessor::AsyncMarkFinished(const JobID &job_id,
|
||||
const StatusCallback &callback) {
|
||||
std::shared_ptr<JobTableData> data_ptr =
|
||||
CreateJobTableData(job_id, /*is_dead*/ true, /*time_stamp*/ std::time(nullptr),
|
||||
/*driver_ip_address*/ "", /*driver_pid*/ -1);
|
||||
return DoAsyncAppend(data_ptr, callback);
|
||||
}
|
||||
|
||||
Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr<JobTableData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
JobTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const JobID &job_id,
|
||||
const JobTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
JobID job_id = JobID::FromBinary(data_ptr->job_id());
|
||||
return client_impl_->job_table().Append(job_id, job_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisJobInfoAccessor::AsyncSubscribeAll(
|
||||
const SubscribeCallback<JobID, JobTableData> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl),
|
||||
task_sub_executor_(client_impl->raylet_task_table()),
|
||||
task_lease_sub_executor_(client_impl->task_lease_table()) {}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncAdd(const std::shared_ptr<TaskTableData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
raylet::TaskTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const TaskID &task_id,
|
||||
const TaskTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
TaskID task_id = TaskID::FromBinary(data_ptr->task().task_spec().task_id());
|
||||
raylet::TaskTable &task_table = client_impl_->raylet_task_table();
|
||||
return task_table.Add(task_id.JobId(), task_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncGet(
|
||||
const TaskID &task_id, const OptionalItemCallback<TaskTableData> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id,
|
||||
const TaskTableData &data) {
|
||||
boost::optional<TaskTableData> result(data);
|
||||
callback(Status::OK(), result);
|
||||
};
|
||||
|
||||
auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) {
|
||||
boost::optional<TaskTableData> result;
|
||||
callback(Status::Invalid("Task not exist."), result);
|
||||
};
|
||||
|
||||
raylet::TaskTable &task_table = client_impl_->raylet_task_table();
|
||||
return task_table.Lookup(task_id.JobId(), task_id, on_success, on_failure);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncSubscribe(
|
||||
const TaskID &task_id, const SubscribeCallback<TaskID, TaskTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return task_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) {
|
||||
return task_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncAddTaskLease(
|
||||
const std::shared_ptr<TaskLeaseData> &data_ptr, const StatusCallback &callback) {
|
||||
TaskLeaseTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const TaskID &id,
|
||||
const TaskLeaseData &data) { callback(Status::OK()); };
|
||||
}
|
||||
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
|
||||
TaskLeaseTable &task_lease_table = client_impl_->task_lease_table();
|
||||
return task_lease_table.Add(task_id.JobId(), task_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncGetTaskLease(
|
||||
const TaskID &task_id, const OptionalItemCallback<TaskLeaseData> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id,
|
||||
const TaskLeaseData &data) {
|
||||
boost::optional<TaskLeaseData> result(data);
|
||||
callback(Status::OK(), result);
|
||||
};
|
||||
|
||||
auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) {
|
||||
boost::optional<TaskLeaseData> result;
|
||||
callback(Status::Invalid("Task lease not exist."), result);
|
||||
};
|
||||
|
||||
TaskLeaseTable &task_lease_table = client_impl_->task_lease_table();
|
||||
return task_lease_table.Lookup(task_id.JobId(), task_id, on_success, on_failure);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncSubscribeTaskLease(
|
||||
const TaskID &task_id,
|
||||
const SubscribeCallback<TaskID, boost::optional<TaskLeaseData>> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return task_lease_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &task_id) {
|
||||
return task_lease_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr);
|
||||
}
|
||||
|
||||
Status RedisTaskInfoAccessor::AttemptTaskReconstruction(
|
||||
const std::shared_ptr<TaskReconstructionData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
TaskReconstructionLog::WriteCallback on_success = nullptr;
|
||||
TaskReconstructionLog::WriteCallback on_failure = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_success = [callback](RedisGcsClient *client, const TaskID &id,
|
||||
const TaskReconstructionData &data) {
|
||||
callback(Status::OK());
|
||||
};
|
||||
on_failure = [callback](RedisGcsClient *client, const TaskID &id,
|
||||
const TaskReconstructionData &data) {
|
||||
callback(Status::Invalid("Updating task reconstruction failed."));
|
||||
};
|
||||
}
|
||||
|
||||
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
|
||||
int reconstruction_attempt = data_ptr->num_reconstructions();
|
||||
TaskReconstructionLog &task_reconstruction_log =
|
||||
client_impl_->task_reconstruction_log();
|
||||
return task_reconstruction_log.AppendAt(task_id.JobId(), task_id, data_ptr, on_success,
|
||||
on_failure, reconstruction_attempt);
|
||||
}
|
||||
|
||||
RedisObjectInfoAccessor::RedisObjectInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl), object_sub_executor_(client_impl->object_table()) {}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncGetLocations(
|
||||
const ObjectID &object_id,
|
||||
const OptionalItemCallback<rpc::ObjectLocationInfo> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
|
||||
const std::vector<ObjectTableData> &data) {
|
||||
rpc::ObjectLocationInfo info;
|
||||
info.set_object_id(object_id.Binary());
|
||||
for (const auto &item : data) {
|
||||
auto item_ptr = info.add_locations();
|
||||
item_ptr->CopyFrom(item);
|
||||
}
|
||||
callback(Status::OK(), info);
|
||||
};
|
||||
|
||||
ObjectTable &object_table = client_impl_->object_table();
|
||||
return object_table.Lookup(object_id.TaskId().JobId(), object_id, on_done);
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
std::function<void(RedisGcsClient * client, const ObjectID &id,
|
||||
const ObjectTableData &data)>
|
||||
on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
|
||||
const ObjectTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
std::shared_ptr<ObjectTableData> data_ptr = std::make_shared<ObjectTableData>();
|
||||
data_ptr->set_manager(node_id.Binary());
|
||||
|
||||
ObjectTable &object_table = client_impl_->object_table();
|
||||
return object_table.Add(object_id.TaskId().JobId(), object_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
std::function<void(RedisGcsClient * client, const ObjectID &id,
|
||||
const ObjectTableData &data)>
|
||||
on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
|
||||
const ObjectTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
std::shared_ptr<ObjectTableData> data_ptr = std::make_shared<ObjectTableData>();
|
||||
data_ptr->set_manager(node_id.Binary());
|
||||
|
||||
ObjectTable &object_table = client_impl_->object_table();
|
||||
return object_table.Remove(object_id.TaskId().JobId(), object_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncSubscribeToLocations(
|
||||
const ObjectID &object_id,
|
||||
const SubscribeCallback<ObjectID, std::vector<rpc::ObjectLocationChange>> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return object_sub_executor_.AsyncSubscribe(
|
||||
subscribe_id_, object_id,
|
||||
[subscribe](const ObjectID &id, const ObjectChangeNotification ¬ification_data) {
|
||||
std::vector<rpc::ObjectLocationChange> updates;
|
||||
for (const auto &item : notification_data.GetData()) {
|
||||
rpc::ObjectLocationChange update;
|
||||
update.set_is_add(notification_data.IsAdded());
|
||||
update.set_node_id(item.manager());
|
||||
updates.push_back(update);
|
||||
}
|
||||
subscribe(id, updates);
|
||||
},
|
||||
done);
|
||||
}
|
||||
|
||||
Status RedisObjectInfoAccessor::AsyncUnsubscribeToLocations(const ObjectID &object_id) {
|
||||
return object_sub_executor_.AsyncUnsubscribe(subscribe_id_, object_id, nullptr);
|
||||
}
|
||||
|
||||
RedisNodeInfoAccessor::RedisNodeInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl),
|
||||
resource_usage_batch_sub_executor_(client_impl->resource_usage_batch_table()) {}
|
||||
|
||||
Status RedisNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info,
|
||||
const StatusCallback &callback) {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
Status status = node_table.Connect(local_node_info);
|
||||
if (callback != nullptr) {
|
||||
callback(Status::OK());
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::UnregisterSelf() {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.Disconnect();
|
||||
}
|
||||
|
||||
const NodeID &RedisNodeInfoAccessor::GetSelfId() const {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.GetLocalNodeId();
|
||||
}
|
||||
|
||||
const GcsNodeInfo &RedisNodeInfoAccessor::GetSelfInfo() const {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.GetLocalNode();
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) {
|
||||
NodeTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const GcsNodeInfo &data) { callback(Status::OK()); };
|
||||
}
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.MarkConnected(node_info, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
|
||||
const StatusCallback &callback) {
|
||||
NodeTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const GcsNodeInfo &data) { callback(Status::OK()); };
|
||||
}
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.MarkDisconnected(node_id, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.SubscribeToNodeChange(subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncGetAll(
|
||||
const MultiItemCallback<GcsNodeInfo> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const std::vector<GcsNodeInfo> &data) {
|
||||
std::vector<GcsNodeInfo> result;
|
||||
std::set<std::string> node_ids;
|
||||
for (int index = data.size() - 1; index >= 0; --index) {
|
||||
if (node_ids.insert(data[index].node_id()).second) {
|
||||
result.emplace_back(data[index]);
|
||||
}
|
||||
}
|
||||
callback(Status::OK(), result);
|
||||
};
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.Lookup(on_done);
|
||||
}
|
||||
|
||||
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const NodeID &node_id,
|
||||
bool filter_dead_nodes) const {
|
||||
GcsNodeInfo node_info;
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
bool found = node_table.GetNode(node_id, &node_info);
|
||||
boost::optional<GcsNodeInfo> optional_node;
|
||||
if (found) {
|
||||
optional_node = std::move(node_info);
|
||||
}
|
||||
return optional_node;
|
||||
}
|
||||
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.GetAllNodes();
|
||||
}
|
||||
|
||||
bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
|
||||
NodeTable &node_table = client_impl_->node_table();
|
||||
return node_table.IsRemoved(node_id);
|
||||
}
|
||||
Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
|
||||
const std::shared_ptr<HeartbeatTableData> &data_ptr, const StatusCallback &callback) {
|
||||
HeartbeatTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const HeartbeatTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
NodeID node_id = NodeID::FromBinary(data_ptr->node_id());
|
||||
HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table();
|
||||
return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncReportResourceUsage(
|
||||
const std::shared_ptr<rpc::ResourcesData> &data_ptr, const StatusCallback &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
void RedisNodeInfoAccessor::AsyncReReportResourceUsage() {}
|
||||
|
||||
Status RedisNodeInfoAccessor::AsyncSubscribeBatchedResourceUsage(
|
||||
const ItemCallback<ResourceUsageBatchData> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_subscribe = [subscribe](const NodeID &node_id,
|
||||
const ResourceUsageBatchData &data) {
|
||||
subscribe(data);
|
||||
};
|
||||
|
||||
return resource_usage_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe,
|
||||
done);
|
||||
}
|
||||
|
||||
RedisNodeResourceInfoAccessor::RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl), resource_sub_executor_(client_impl_->resource_table()) {}
|
||||
|
||||
Status RedisNodeResourceInfoAccessor::AsyncGetResources(
|
||||
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
|
||||
RAY_CHECK(callback != nullptr);
|
||||
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
|
||||
const ResourceMap &data) {
|
||||
boost::optional<ResourceMap> result;
|
||||
if (!data.empty()) {
|
||||
result = data;
|
||||
}
|
||||
callback(Status::OK(), result);
|
||||
};
|
||||
|
||||
DynamicResourceTable &resource_table = client_impl_->resource_table();
|
||||
return resource_table.Lookup(JobID::Nil(), node_id, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeResourceInfoAccessor::AsyncUpdateResources(
|
||||
const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) {
|
||||
Hash<NodeID, ResourceTableData>::HashCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const ResourceMap &resources) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
DynamicResourceTable &resource_table = client_impl_->resource_table();
|
||||
return resource_table.Update(JobID::Nil(), node_id, resources, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeResourceInfoAccessor::AsyncDeleteResources(
|
||||
const NodeID &node_id, const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) {
|
||||
Hash<NodeID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names) {
|
||||
callback(Status::OK());
|
||||
};
|
||||
}
|
||||
|
||||
DynamicResourceTable &resource_table = client_impl_->resource_table();
|
||||
return resource_table.RemoveEntries(JobID::Nil(), node_id, resource_names, on_done);
|
||||
}
|
||||
|
||||
Status RedisNodeResourceInfoAccessor::AsyncSubscribeToResources(
|
||||
const ItemCallback<rpc::NodeResourceChange> &subscribe, const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_subscribe = [subscribe](const NodeID &id,
|
||||
const ResourceChangeNotification &result) {
|
||||
rpc::NodeResourceChange node_resource_change;
|
||||
node_resource_change.set_node_id(id.Binary());
|
||||
if (result.IsAdded()) {
|
||||
for (auto &it : result.GetData()) {
|
||||
(*node_resource_change.mutable_updated_resources())[it.first] =
|
||||
it.second->resource_capacity();
|
||||
}
|
||||
} else {
|
||||
for (auto &it : result.GetData()) {
|
||||
node_resource_change.add_deleted_resources(it.first);
|
||||
}
|
||||
}
|
||||
subscribe(node_resource_change);
|
||||
};
|
||||
return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
|
||||
}
|
||||
|
||||
RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {}
|
||||
|
||||
Status RedisErrorInfoAccessor::AsyncReportJobError(
|
||||
const std::shared_ptr<ErrorTableData> &data_ptr, const StatusCallback &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
RedisStatsInfoAccessor::RedisStatsInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl) {}
|
||||
|
||||
Status RedisStatsInfoAccessor::AsyncAddProfileData(
|
||||
const std::shared_ptr<ProfileTableData> &data_ptr, const StatusCallback &callback) {
|
||||
ProfileTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const UniqueID &id,
|
||||
const ProfileTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
ProfileTable &profile_table = client_impl_->profile_table();
|
||||
return profile_table.Append(JobID::Nil(), UniqueID::FromRandom(), data_ptr, on_done);
|
||||
}
|
||||
|
||||
RedisWorkerInfoAccessor::RedisWorkerInfoAccessor(RedisGcsClient *client_impl)
|
||||
: client_impl_(client_impl),
|
||||
worker_failure_sub_executor_(client_impl->worker_table()) {}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures(
|
||||
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
|
||||
}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure(
|
||||
const std::shared_ptr<WorkerTableData> &data_ptr, const StatusCallback &callback) {
|
||||
WorkerTable::WriteCallback on_done = nullptr;
|
||||
if (callback != nullptr) {
|
||||
on_done = [callback](RedisGcsClient *client, const WorkerID &id,
|
||||
const WorkerTableData &data) { callback(Status::OK()); };
|
||||
}
|
||||
|
||||
WorkerID worker_id = WorkerID::FromBinary(data_ptr->worker_address().worker_id());
|
||||
WorkerTable &worker_failure_table = client_impl_->worker_table();
|
||||
return worker_failure_table.Add(JobID::Nil(), worker_id, data_ptr, on_done);
|
||||
}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncGet(
|
||||
const WorkerID &worker_id,
|
||||
const OptionalItemCallback<rpc::WorkerTableData> &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncGetAll(
|
||||
const MultiItemCallback<rpc::WorkerTableData> &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisWorkerInfoAccessor::AsyncAdd(
|
||||
const std::shared_ptr<rpc::WorkerTableData> &data_ptr,
|
||||
const StatusCallback &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisPlacementGroupInfoAccessor::AsyncCreatePlacementGroup(
|
||||
const PlacementGroupSpecification &placement_group_spec) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisPlacementGroupInfoAccessor::AsyncRemovePlacementGroup(
|
||||
const PlacementGroupID &placement_group_id, const StatusCallback &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisPlacementGroupInfoAccessor::AsyncGet(
|
||||
const PlacementGroupID &placement_group_id,
|
||||
const OptionalItemCallback<rpc::PlacementGroupTableData> &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisPlacementGroupInfoAccessor::AsyncGetAll(
|
||||
const MultiItemCallback<rpc::PlacementGroupTableData> &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
Status RedisPlacementGroupInfoAccessor::AsyncWaitUntilReady(
|
||||
const PlacementGroupID &placement_group_id, const StatusCallback &callback) {
|
||||
return Status::Invalid("Not implemented");
|
||||
}
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,491 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/task/task_spec.h"
|
||||
#include "ray/gcs/accessor.h"
|
||||
#include "ray/gcs/callback.h"
|
||||
#include "ray/gcs/subscription_executor.h"
|
||||
#include "ray/gcs/tables.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class RedisGcsClient;
|
||||
|
||||
/// \class RedisLogBasedActorInfoAccessor
|
||||
/// `RedisLogBasedActorInfoAccessor` is an implementation of `ActorInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor {
|
||||
public:
|
||||
explicit RedisLogBasedActorInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisLogBasedActorInfoAccessor() {}
|
||||
|
||||
Status GetAll(std::vector<ActorTableData> *actor_table_data_list) override;
|
||||
|
||||
Status AsyncGet(const ActorID &actor_id,
|
||||
const OptionalItemCallback<ActorTableData> &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::ActorTableData> &callback) override {
|
||||
return Status::NotImplemented(
|
||||
"RedisLogBasedActorInfoAccessor does not support AsyncGetAll.");
|
||||
}
|
||||
|
||||
Status AsyncGetByName(const std::string &name,
|
||||
const OptionalItemCallback<ActorTableData> &callback) override {
|
||||
return Status::NotImplemented(
|
||||
"RedisLogBasedActorInfoAccessor does not support named detached actors.");
|
||||
}
|
||||
|
||||
Status AsyncRegisterActor(const TaskSpecification &task_spec,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncCreateActor(const TaskSpecification &task_spec,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeAll(const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncSubscribe(const ActorID &actor_id,
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncUnsubscribe(const ActorID &actor_id) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
bool IsActorUnsubscribed(const ActorID &actor_id) override { return false; }
|
||||
|
||||
protected:
|
||||
virtual std::vector<ActorID> GetAllActorID() const;
|
||||
virtual Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const;
|
||||
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
// Use a random NodeID for actor subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
private:
|
||||
typedef SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>
|
||||
ActorSubscriptionExecutor;
|
||||
ActorSubscriptionExecutor log_based_actor_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisActorInfoAccessor
|
||||
/// `RedisActorInfoAccessor` is an implementation of `ActorInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisActorInfoAccessor : public RedisLogBasedActorInfoAccessor {
|
||||
public:
|
||||
explicit RedisActorInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisActorInfoAccessor() {}
|
||||
|
||||
Status AsyncGet(const ActorID &actor_id,
|
||||
const OptionalItemCallback<ActorTableData> &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::ActorTableData> &callback) override;
|
||||
|
||||
Status AsyncGetByName(const std::string &name,
|
||||
const OptionalItemCallback<ActorTableData> &callback) override {
|
||||
return Status::NotImplemented(
|
||||
"RedisActorInfoAccessor does not support named detached actors.");
|
||||
}
|
||||
|
||||
Status AsyncSubscribeAll(const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncSubscribe(const ActorID &actor_id,
|
||||
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncUnsubscribe(const ActorID &actor_id) override;
|
||||
|
||||
protected:
|
||||
std::vector<ActorID> GetAllActorID() const override;
|
||||
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const override;
|
||||
|
||||
private:
|
||||
typedef SubscriptionExecutor<ActorID, ActorTableData, ActorTable>
|
||||
ActorSubscriptionExecutor;
|
||||
ActorSubscriptionExecutor actor_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisJobInfoAccessor
|
||||
/// RedisJobInfoAccessor is an implementation of `JobInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisJobInfoAccessor : public JobInfoAccessor {
|
||||
public:
|
||||
explicit RedisJobInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisJobInfoAccessor() {}
|
||||
|
||||
Status AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeAll(const SubscribeCallback<JobID, JobTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::JobTableData> &callback) override {
|
||||
return Status::NotImplemented("AsyncGetAll not implemented");
|
||||
}
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
private:
|
||||
/// Append job information to GCS asynchronously.
|
||||
///
|
||||
/// \param data_ptr The job information that will be appended to GCS.
|
||||
/// \param callback Callback that will be called after append done.
|
||||
/// \return Status
|
||||
Status DoAsyncAppend(const std::shared_ptr<JobTableData> &data_ptr,
|
||||
const StatusCallback &callback);
|
||||
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
typedef SubscriptionExecutor<JobID, JobTableData, JobTable> JobSubscriptionExecutor;
|
||||
JobSubscriptionExecutor job_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisTaskInfoAccessor
|
||||
/// `RedisTaskInfoAccessor` is an implementation of `TaskInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisTaskInfoAccessor : public TaskInfoAccessor {
|
||||
public:
|
||||
explicit RedisTaskInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisTaskInfoAccessor() {}
|
||||
|
||||
Status AsyncAdd(const std::shared_ptr<TaskTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGet(const TaskID &task_id,
|
||||
const OptionalItemCallback<TaskTableData> &callback) override;
|
||||
|
||||
Status AsyncSubscribe(const TaskID &task_id,
|
||||
const SubscribeCallback<TaskID, TaskTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncUnsubscribe(const TaskID &task_id) override;
|
||||
|
||||
Status AsyncAddTaskLease(const std::shared_ptr<TaskLeaseData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetTaskLease(const TaskID &task_id,
|
||||
const OptionalItemCallback<TaskLeaseData> &callback) override;
|
||||
|
||||
Status AsyncSubscribeTaskLease(
|
||||
const TaskID &task_id,
|
||||
const SubscribeCallback<TaskID, boost::optional<TaskLeaseData>> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncUnsubscribeTaskLease(const TaskID &task_id) override;
|
||||
|
||||
Status AttemptTaskReconstruction(
|
||||
const std::shared_ptr<TaskReconstructionData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
bool IsTaskUnsubscribed(const TaskID &task_id) override { return false; }
|
||||
|
||||
bool IsTaskLeaseUnsubscribed(const TaskID &task_id) override { return false; }
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
// Use a random NodeID for task subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
typedef SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>
|
||||
TaskSubscriptionExecutor;
|
||||
TaskSubscriptionExecutor task_sub_executor_;
|
||||
|
||||
typedef SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>, TaskLeaseTable>
|
||||
TaskLeaseSubscriptionExecutor;
|
||||
TaskLeaseSubscriptionExecutor task_lease_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisObjectInfoAccessor
|
||||
/// RedisObjectInfoAccessor is an implementation of `ObjectInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisObjectInfoAccessor : public ObjectInfoAccessor {
|
||||
public:
|
||||
explicit RedisObjectInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisObjectInfoAccessor() {}
|
||||
|
||||
Status AsyncGetLocations(
|
||||
const ObjectID &object_id,
|
||||
const OptionalItemCallback<rpc::ObjectLocationInfo> &callback) override;
|
||||
|
||||
Status AsyncGetAll(
|
||||
const MultiItemCallback<rpc::ObjectLocationInfo> &callback) override {
|
||||
return Status::NotImplemented("AsyncGetAll not implemented");
|
||||
}
|
||||
|
||||
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncAddSpilledUrl(const ObjectID &object_id, const std::string &spilled_url,
|
||||
const StatusCallback &callback) override {
|
||||
return Status::NotImplemented("AsyncAddSpilledUrl not implemented");
|
||||
}
|
||||
|
||||
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeToLocations(
|
||||
const ObjectID &object_id,
|
||||
const SubscribeCallback<ObjectID, std::vector<rpc::ObjectLocationChange>>
|
||||
&subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncUnsubscribeToLocations(const ObjectID &object_id) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
bool IsObjectUnsubscribed(const ObjectID &object_id) override { return false; }
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
// Use a random NodeID for object subscription. Because:
|
||||
// If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client.
|
||||
// Even we can filter out irrelevant updates, but there will be extra overhead.
|
||||
// And because the new GCS Client will no longer hold the local NodeID, so we use
|
||||
// random NodeID instead.
|
||||
// TODO(micafan): Remove this random id, once GCS becomes a service.
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
|
||||
typedef SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>
|
||||
ObjectSubscriptionExecutor;
|
||||
ObjectSubscriptionExecutor object_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisNodeInfoAccessor
|
||||
/// RedisNodeInfoAccessor is an implementation of `NodeInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisNodeInfoAccessor : public NodeInfoAccessor {
|
||||
public:
|
||||
explicit RedisNodeInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisNodeInfoAccessor() {}
|
||||
|
||||
Status RegisterSelf(const GcsNodeInfo &local_node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status UnregisterSelf() override;
|
||||
|
||||
const NodeID &GetSelfId() const override;
|
||||
|
||||
const GcsNodeInfo &GetSelfInfo() const override;
|
||||
|
||||
Status AsyncRegister(const GcsNodeInfo &node_info,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
|
||||
|
||||
Status AsyncSubscribeToNodeChange(
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
boost::optional<GcsNodeInfo> Get(const NodeID &node_id,
|
||||
bool filter_dead_nodes = true) const override;
|
||||
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
|
||||
|
||||
bool IsRemoved(const NodeID &node_id) const override;
|
||||
|
||||
Status AsyncReportHeartbeat(const std::shared_ptr<HeartbeatTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncReportResourceUsage(const std::shared_ptr<rpc::ResourcesData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
void AsyncReReportResourceUsage() override;
|
||||
|
||||
Status AsyncGetAllResourceUsage(
|
||||
const ItemCallback<rpc::ResourceUsageBatchData> &callback) override {
|
||||
return Status::NotImplemented("AsyncGetAllResourceUsage not implemented");
|
||||
}
|
||||
|
||||
Status AsyncSubscribeBatchedResourceUsage(
|
||||
const ItemCallback<ResourceUsageBatchData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
Status AsyncSetInternalConfig(
|
||||
std::unordered_map<std::string, std::string> &config) override {
|
||||
return Status::NotImplemented("SetInternaConfig not implemented.");
|
||||
}
|
||||
|
||||
Status AsyncGetInternalConfig(
|
||||
const OptionalItemCallback<std::unordered_map<std::string, std::string>> &callback)
|
||||
override {
|
||||
return Status::NotImplemented("GetInternalConfig not implemented.");
|
||||
}
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
typedef SubscriptionExecutor<NodeID, ResourceUsageBatchData, ResourceUsageBatchTable>
|
||||
HeartbeatBatchSubscriptionExecutor;
|
||||
HeartbeatBatchSubscriptionExecutor resource_usage_batch_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisNodeResourceInfoAccessor
|
||||
/// RedisNodeResourceInfoAccessor is an implementation of `NodeResourceInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisNodeResourceInfoAccessor : public NodeResourceInfoAccessor {
|
||||
public:
|
||||
explicit RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisNodeResourceInfoAccessor() {}
|
||||
|
||||
Status AsyncGetResources(const NodeID &node_id,
|
||||
const OptionalItemCallback<ResourceMap> &callback) override;
|
||||
|
||||
Status AsyncGetAllAvailableResources(
|
||||
const MultiItemCallback<rpc::AvailableResources> &callback) override {
|
||||
return Status::NotImplemented("AsyncGetAllAvailableResources not implemented");
|
||||
}
|
||||
|
||||
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncDeleteResources(const NodeID &node_id,
|
||||
const std::vector<std::string> &resource_names,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncSubscribeToResources(const ItemCallback<rpc::NodeResourceChange> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
typedef SubscriptionExecutor<NodeID, ResourceChangeNotification, DynamicResourceTable>
|
||||
DynamicResourceSubscriptionExecutor;
|
||||
DynamicResourceSubscriptionExecutor resource_sub_executor_;
|
||||
};
|
||||
|
||||
/// \class RedisErrorInfoAccessor
|
||||
/// RedisErrorInfoAccessor is an implementation of `ErrorInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisErrorInfoAccessor : public ErrorInfoAccessor {
|
||||
public:
|
||||
explicit RedisErrorInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisErrorInfoAccessor() = default;
|
||||
|
||||
Status AsyncReportJobError(const std::shared_ptr<ErrorTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
};
|
||||
|
||||
/// \class RedisStatsInfoAccessor
|
||||
/// RedisStatsInfoAccessor is an implementation of `StatsInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisStatsInfoAccessor : public StatsInfoAccessor {
|
||||
public:
|
||||
explicit RedisStatsInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisStatsInfoAccessor() = default;
|
||||
|
||||
Status AsyncAddProfileData(const std::shared_ptr<ProfileTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::ProfileTableData> &callback) override {
|
||||
return Status::NotImplemented("AsyncGetAll not implemented");
|
||||
}
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
};
|
||||
|
||||
/// \class RedisWorkerInfoAccessor
|
||||
/// RedisWorkerInfoAccessor is an implementation of `WorkerInfoAccessor`
|
||||
/// that uses Redis as the backend storage.
|
||||
class RedisWorkerInfoAccessor : public WorkerInfoAccessor {
|
||||
public:
|
||||
explicit RedisWorkerInfoAccessor(RedisGcsClient *client_impl);
|
||||
|
||||
virtual ~RedisWorkerInfoAccessor() = default;
|
||||
|
||||
Status AsyncSubscribeToWorkerFailures(
|
||||
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
|
||||
const StatusCallback &done) override;
|
||||
|
||||
Status AsyncReportWorkerFailure(const std::shared_ptr<WorkerTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGet(const WorkerID &worker_id,
|
||||
const OptionalItemCallback<rpc::WorkerTableData> &callback) override;
|
||||
|
||||
Status AsyncGetAll(const MultiItemCallback<rpc::WorkerTableData> &callback) override;
|
||||
|
||||
Status AsyncAdd(const std::shared_ptr<rpc::WorkerTableData> &data_ptr,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
|
||||
|
||||
private:
|
||||
RedisGcsClient *client_impl_{nullptr};
|
||||
|
||||
typedef SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>
|
||||
WorkerFailureSubscriptionExecutor;
|
||||
WorkerFailureSubscriptionExecutor worker_failure_sub_executor_;
|
||||
};
|
||||
|
||||
class RedisPlacementGroupInfoAccessor : public PlacementGroupInfoAccessor {
|
||||
public:
|
||||
virtual ~RedisPlacementGroupInfoAccessor() = default;
|
||||
|
||||
Status AsyncCreatePlacementGroup(
|
||||
const PlacementGroupSpecification &placement_group_spec) override;
|
||||
|
||||
Status AsyncRemovePlacementGroup(const PlacementGroupID &placement_group_id,
|
||||
const StatusCallback &callback) override;
|
||||
|
||||
Status AsyncGet(
|
||||
const PlacementGroupID &placement_group_id,
|
||||
const OptionalItemCallback<rpc::PlacementGroupTableData> &callback) override;
|
||||
|
||||
Status AsyncGetAll(
|
||||
const MultiItemCallback<rpc::PlacementGroupTableData> &callback) override;
|
||||
|
||||
Status AsyncWaitUntilReady(const PlacementGroupID &placement_group_id,
|
||||
const StatusCallback &callback) override;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,144 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
|
||||
#include "ray/common/ray_config.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_context.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
RedisGcsClient::RedisGcsClient(const GcsClientOptions &options)
|
||||
: RedisGcsClient(options, CommandType::kRegular) {}
|
||||
|
||||
RedisGcsClient::RedisGcsClient(const GcsClientOptions &options, CommandType command_type)
|
||||
: GcsClient(options), command_type_(command_type) {
|
||||
RedisClientOptions redis_client_options(options.server_ip_, options.server_port_,
|
||||
options.password_, options.is_test_client_);
|
||||
redis_client_.reset(new RedisClient(redis_client_options));
|
||||
}
|
||||
|
||||
Status RedisGcsClient::Connect(boost::asio::io_service &io_service) {
|
||||
RAY_CHECK(!is_connected_);
|
||||
|
||||
Status status = redis_client_->Connect(io_service);
|
||||
if (!status.ok()) {
|
||||
RAY_LOG(INFO) << "RedisGcsClient::Connect failed, status " << status.ToString();
|
||||
return status;
|
||||
}
|
||||
|
||||
std::shared_ptr<RedisContext> primary_context = redis_client_->GetPrimaryContext();
|
||||
std::vector<std::shared_ptr<RedisContext>> shard_contexts =
|
||||
redis_client_->GetShardContexts();
|
||||
|
||||
log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this));
|
||||
actor_table_.reset(new ActorTable({primary_context}, this));
|
||||
|
||||
// TODO(micafan) Modify NodeTable' Constructor(remove NodeID) in future.
|
||||
// We will use NodeID instead of NodeID.
|
||||
// For worker/driver, it might not have this field(NodeID).
|
||||
// For raylet, NodeID should be initialized in raylet layer(not here).
|
||||
node_table_.reset(new NodeTable({primary_context}, this));
|
||||
|
||||
job_table_.reset(new JobTable({primary_context}, this));
|
||||
resource_usage_batch_table_.reset(new ResourceUsageBatchTable({primary_context}, this));
|
||||
// Tables below would be sharded.
|
||||
object_table_.reset(new ObjectTable(shard_contexts, this));
|
||||
raylet_task_table_.reset(new raylet::TaskTable(shard_contexts, this, command_type_));
|
||||
task_reconstruction_log_.reset(new TaskReconstructionLog(shard_contexts, this));
|
||||
task_lease_table_.reset(new TaskLeaseTable(shard_contexts, this));
|
||||
heartbeat_table_.reset(new HeartbeatTable(shard_contexts, this));
|
||||
profile_table_.reset(new ProfileTable(shard_contexts, this));
|
||||
resource_table_.reset(new DynamicResourceTable({primary_context}, this));
|
||||
worker_table_.reset(new WorkerTable(shard_contexts, this));
|
||||
|
||||
actor_accessor_.reset(new RedisActorInfoAccessor(this));
|
||||
|
||||
job_accessor_.reset(new RedisJobInfoAccessor(this));
|
||||
object_accessor_.reset(new RedisObjectInfoAccessor(this));
|
||||
node_accessor_.reset(new RedisNodeInfoAccessor(this));
|
||||
node_resource_accessor_.reset(new RedisNodeResourceInfoAccessor(this));
|
||||
task_accessor_.reset(new RedisTaskInfoAccessor(this));
|
||||
error_accessor_.reset(new RedisErrorInfoAccessor(this));
|
||||
stats_accessor_.reset(new RedisStatsInfoAccessor(this));
|
||||
worker_accessor_.reset(new RedisWorkerInfoAccessor(this));
|
||||
placement_group_accessor_.reset(new RedisPlacementGroupInfoAccessor());
|
||||
|
||||
is_connected_ = true;
|
||||
|
||||
RAY_LOG(DEBUG) << "RedisGcsClient connected.";
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void RedisGcsClient::Disconnect() {
|
||||
RAY_CHECK(is_connected_);
|
||||
is_connected_ = false;
|
||||
redis_client_->Disconnect();
|
||||
RAY_LOG(DEBUG) << "RedisGcsClient Disconnected.";
|
||||
}
|
||||
|
||||
std::string RedisGcsClient::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << "RedisGcsClient:";
|
||||
result << "\n- TaskTable: " << raylet_task_table_->DebugString();
|
||||
result << "\n- LogBasedActorTable: " << log_based_actor_table_->DebugString();
|
||||
result << "\n- ActorTable: " << actor_table_->DebugString();
|
||||
result << "\n- TaskReconstructionLog: " << task_reconstruction_log_->DebugString();
|
||||
result << "\n- TaskLeaseTable: " << task_lease_table_->DebugString();
|
||||
result << "\n- HeartbeatTable: " << heartbeat_table_->DebugString();
|
||||
result << "\n- ProfileTable: " << profile_table_->DebugString();
|
||||
result << "\n- NodeTable: " << node_table_->DebugString();
|
||||
result << "\n- JobTable: " << job_table_->DebugString();
|
||||
return result.str();
|
||||
}
|
||||
|
||||
ObjectTable &RedisGcsClient::object_table() { return *object_table_; }
|
||||
|
||||
raylet::TaskTable &RedisGcsClient::raylet_task_table() { return *raylet_task_table_; }
|
||||
|
||||
LogBasedActorTable &RedisGcsClient::log_based_actor_table() {
|
||||
return *log_based_actor_table_;
|
||||
}
|
||||
|
||||
ActorTable &RedisGcsClient::actor_table() { return *actor_table_; }
|
||||
|
||||
WorkerTable &RedisGcsClient::worker_table() { return *worker_table_; }
|
||||
|
||||
TaskReconstructionLog &RedisGcsClient::task_reconstruction_log() {
|
||||
return *task_reconstruction_log_;
|
||||
}
|
||||
|
||||
TaskLeaseTable &RedisGcsClient::task_lease_table() { return *task_lease_table_; }
|
||||
|
||||
NodeTable &RedisGcsClient::node_table() { return *node_table_; }
|
||||
|
||||
HeartbeatTable &RedisGcsClient::heartbeat_table() { return *heartbeat_table_; }
|
||||
|
||||
ResourceUsageBatchTable &RedisGcsClient::resource_usage_batch_table() {
|
||||
return *resource_usage_batch_table_;
|
||||
}
|
||||
|
||||
JobTable &RedisGcsClient::job_table() { return *job_table_; }
|
||||
|
||||
ProfileTable &RedisGcsClient::profile_table() { return *profile_table_; }
|
||||
|
||||
DynamicResourceTable &RedisGcsClient::resource_table() { return *resource_table_; }
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,131 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/gcs/asio.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/gcs/redis_client.h"
|
||||
#include "ray/gcs/tables.h"
|
||||
#include "ray/util/logging.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class RedisContext;
|
||||
|
||||
class RAY_EXPORT RedisGcsClient : public GcsClient {
|
||||
public:
|
||||
/// Constructor of RedisGcsClient.
|
||||
/// Connect() must be called(and return ok) before you call any other methods.
|
||||
/// TODO(micafan) To read and write from the GCS tables requires a further
|
||||
/// call to Connect() to the client table. Will fix this in next pr.
|
||||
///
|
||||
/// \param options Options of this client, e.g. server address, password and so on.
|
||||
RedisGcsClient(const GcsClientOptions &options);
|
||||
|
||||
/// This constructor is only used for testing.
|
||||
/// Connect() must be called(and return ok) before you call any other methods.
|
||||
///
|
||||
/// \param options Options of this client, e.g. server address, password and so on.
|
||||
/// \param command_type The commands issued type.
|
||||
RedisGcsClient(const GcsClientOptions &options, CommandType command_type);
|
||||
|
||||
/// Connect to GCS Service. Non-thread safe.
|
||||
/// Call this function before calling other functions.
|
||||
///
|
||||
/// \param io_service The event loop for this client.
|
||||
/// Must be single-threaded io_service (get more information from RedisAsioClient).
|
||||
///
|
||||
/// \return Status
|
||||
Status Connect(boost::asio::io_service &io_service) override;
|
||||
|
||||
/// Disconnect with GCS Service. Non-thread safe.
|
||||
void Disconnect() override;
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const override;
|
||||
|
||||
// We also need something to export generic code to run on workers from the
|
||||
// driver (to set the PYTHONPATH)
|
||||
using GetExportCallback = std::function<void(const std::string &data)>;
|
||||
Status AddExport(const std::string &job_id, std::string &export_data);
|
||||
Status GetExport(const std::string &job_id, int64_t export_index,
|
||||
const GetExportCallback &done_callback);
|
||||
|
||||
std::vector<std::shared_ptr<RedisContext>> shard_contexts() {
|
||||
return redis_client_->GetShardContexts();
|
||||
}
|
||||
|
||||
std::shared_ptr<RedisContext> primary_context() {
|
||||
return redis_client_->GetPrimaryContext();
|
||||
}
|
||||
|
||||
std::shared_ptr<RedisClient> GetRedisClient() const { return redis_client_; }
|
||||
|
||||
/// The following xxx_table methods implement the Accessor interfaces.
|
||||
/// Implements the Actors() interface.
|
||||
LogBasedActorTable &log_based_actor_table();
|
||||
ActorTable &actor_table();
|
||||
/// Implements the Jobs() interface.
|
||||
JobTable &job_table();
|
||||
/// Implements the Objects() interface.
|
||||
ObjectTable &object_table();
|
||||
/// Implements the Nodes() interface.
|
||||
NodeTable &node_table();
|
||||
HeartbeatTable &heartbeat_table();
|
||||
ResourceUsageBatchTable &resource_usage_batch_table();
|
||||
DynamicResourceTable &resource_table();
|
||||
/// Implements the Tasks() interface.
|
||||
virtual raylet::TaskTable &raylet_task_table();
|
||||
TaskLeaseTable &task_lease_table();
|
||||
TaskReconstructionLog &task_reconstruction_log();
|
||||
/// Implements the Stats() interface.
|
||||
ProfileTable &profile_table();
|
||||
/// Implements the Workers() interface.
|
||||
WorkerTable &worker_table();
|
||||
|
||||
private:
|
||||
// GCS command type. If CommandType::kChain, chain-replicated versions of the tables
|
||||
// might be used, if available.
|
||||
CommandType command_type_{CommandType::kUnknown};
|
||||
|
||||
std::shared_ptr<RedisClient> redis_client_;
|
||||
|
||||
std::unique_ptr<ObjectTable> object_table_;
|
||||
std::unique_ptr<raylet::TaskTable> raylet_task_table_;
|
||||
std::unique_ptr<LogBasedActorTable> log_based_actor_table_;
|
||||
std::unique_ptr<ActorTable> actor_table_;
|
||||
std::unique_ptr<TaskReconstructionLog> task_reconstruction_log_;
|
||||
std::unique_ptr<TaskLeaseTable> task_lease_table_;
|
||||
std::unique_ptr<HeartbeatTable> heartbeat_table_;
|
||||
std::unique_ptr<ResourceUsageBatchTable> resource_usage_batch_table_;
|
||||
std::unique_ptr<ProfileTable> profile_table_;
|
||||
std::unique_ptr<NodeTable> node_table_;
|
||||
std::unique_ptr<DynamicResourceTable> resource_table_;
|
||||
std::unique_ptr<WorkerTable> worker_table_;
|
||||
std::unique_ptr<JobTable> job_table_;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,215 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/gcs/subscription_executor.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
|
||||
const NodeID &node_id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
// TODO(micafan) Optimize the lock when necessary.
|
||||
// Consider avoiding locking in single-threaded processes.
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
|
||||
if (subscribe_all_callback_ != nullptr) {
|
||||
RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to all elements.";
|
||||
return Status::Invalid("Duplicate subscription!");
|
||||
}
|
||||
|
||||
if (registration_status_ != RegistrationStatus::kNotRegistered) {
|
||||
if (subscribe != nullptr) {
|
||||
RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to specific elements"
|
||||
", can't subscribe to all elements.";
|
||||
return Status::Invalid("Duplicate subscription!");
|
||||
}
|
||||
}
|
||||
|
||||
if (registration_status_ == RegistrationStatus::kRegistered) {
|
||||
// Already registered to GCS, just invoke the `done` callback.
|
||||
lock.unlock();
|
||||
if (done != nullptr) {
|
||||
done(Status::OK());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Registration to GCS is not finished yet, add the `done` callback to the pending list
|
||||
// to be invoked when registration is done.
|
||||
if (done != nullptr) {
|
||||
pending_subscriptions_.emplace_back(done);
|
||||
}
|
||||
|
||||
// If there's another registration request that's already on-going, then wait for it
|
||||
// to finish.
|
||||
if (registration_status_ == RegistrationStatus::kRegistering) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto on_subscribe = [this](RedisGcsClient *client, const ID &id,
|
||||
const std::vector<Data> &result) {
|
||||
if (result.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
SubscribeCallback<ID, Data> sub_one_callback = nullptr;
|
||||
SubscribeCallback<ID, Data> sub_all_callback = nullptr;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
const auto it = id_to_callback_map_.find(id);
|
||||
if (it != id_to_callback_map_.end()) {
|
||||
sub_one_callback = it->second;
|
||||
}
|
||||
sub_all_callback = subscribe_all_callback_;
|
||||
}
|
||||
if (sub_one_callback != nullptr) {
|
||||
sub_one_callback(id, result.back());
|
||||
}
|
||||
if (sub_all_callback != nullptr) {
|
||||
RAY_CHECK(sub_one_callback == nullptr);
|
||||
sub_all_callback(id, result.back());
|
||||
}
|
||||
};
|
||||
|
||||
auto on_done = [this](RedisGcsClient *client) {
|
||||
std::list<StatusCallback> pending_callbacks;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
registration_status_ = RegistrationStatus::kRegistered;
|
||||
pending_callbacks.swap(pending_subscriptions_);
|
||||
RAY_CHECK(pending_subscriptions_.empty());
|
||||
}
|
||||
|
||||
for (const auto &callback : pending_callbacks) {
|
||||
callback(Status::OK());
|
||||
}
|
||||
};
|
||||
|
||||
Status status = table_.Subscribe(JobID::Nil(), node_id, on_subscribe, on_done);
|
||||
if (status.ok()) {
|
||||
registration_status_ = RegistrationStatus::kRegistering;
|
||||
subscribe_all_callback_ = subscribe;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
|
||||
const NodeID &node_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(node_id != NodeID::Nil());
|
||||
|
||||
// NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts,
|
||||
// thus we need to call `RequestNotifications` in the Subscribe callback to ensure
|
||||
// it's processed after the `Subscribe` request. Otherwise if `RequestNotifications`
|
||||
// is processed first we will miss the initial notification.
|
||||
auto on_subscribe_done = [this, node_id, id, subscribe, done](Status status) {
|
||||
auto on_request_notification_done = [this, done, id](Status status) {
|
||||
if (!status.ok()) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
id_to_callback_map_.erase(id);
|
||||
}
|
||||
if (done != nullptr) {
|
||||
done(status);
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
status = table_.RequestNotifications(JobID::Nil(), id, node_id,
|
||||
on_request_notification_done);
|
||||
if (!status.ok()) {
|
||||
id_to_callback_map_.erase(id);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
const auto it = id_to_callback_map_.find(id);
|
||||
if (it != id_to_callback_map_.end()) {
|
||||
RAY_LOG(DEBUG) << "Duplicate subscription to id " << id << " node_id " << node_id;
|
||||
return Status::Invalid("Duplicate subscription to element!");
|
||||
}
|
||||
id_to_callback_map_[id] = subscribe;
|
||||
}
|
||||
|
||||
auto status = AsyncSubscribeAll(node_id, nullptr, on_subscribe_done);
|
||||
if (!status.ok()) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
id_to_callback_map_.erase(id);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename ID, typename Data, typename Table>
|
||||
Status SubscriptionExecutor<ID, Data, Table>::AsyncUnsubscribe(
|
||||
const NodeID &node_id, const ID &id, const StatusCallback &done) {
|
||||
SubscribeCallback<ID, Data> subscribe = nullptr;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
const auto it = id_to_callback_map_.find(id);
|
||||
if (it == id_to_callback_map_.end()) {
|
||||
RAY_LOG(DEBUG) << "Invalid Unsubscribe! id " << id << " node_id " << node_id;
|
||||
return Status::Invalid("Invalid Unsubscribe, no existing subscription found.");
|
||||
}
|
||||
subscribe = std::move(it->second);
|
||||
id_to_callback_map_.erase(it);
|
||||
}
|
||||
|
||||
RAY_CHECK(subscribe != nullptr);
|
||||
auto on_done = [this, id, subscribe, done](Status status) {
|
||||
if (!status.ok()) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
const auto it = id_to_callback_map_.find(id);
|
||||
if (it != id_to_callback_map_.end()) {
|
||||
// The initial AsyncUnsubscribe deleted the callback, but the client
|
||||
// has subscribed again in the meantime. This new callback will be
|
||||
// called if we receive more notifications.
|
||||
RAY_LOG(WARNING)
|
||||
<< "Client called AsyncSubscribe on " << id
|
||||
<< " while AsyncUnsubscribe was pending, but the unsubscribe failed.";
|
||||
} else {
|
||||
// The Unsubscribe failed, so restore the initial callback.
|
||||
id_to_callback_map_[id] = subscribe;
|
||||
}
|
||||
}
|
||||
if (done != nullptr) {
|
||||
done(status);
|
||||
}
|
||||
};
|
||||
|
||||
return table_.CancelNotifications(JobID::Nil(), id, node_id, on_done);
|
||||
}
|
||||
|
||||
template class SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>;
|
||||
template class SubscriptionExecutor<ActorID, ActorTableData, ActorTable>;
|
||||
template class SubscriptionExecutor<JobID, JobTableData, JobTable>;
|
||||
template class SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>;
|
||||
template class SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>;
|
||||
template class SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>,
|
||||
TaskLeaseTable>;
|
||||
template class SubscriptionExecutor<NodeID, ResourceChangeNotification,
|
||||
DynamicResourceTable>;
|
||||
template class SubscriptionExecutor<NodeID, ResourceUsageBatchData,
|
||||
ResourceUsageBatchTable>;
|
||||
template class SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>;
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,108 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
|
||||
#include "ray/gcs/callback.h"
|
||||
#include "ray/gcs/tables.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
/// \class SubscriptionExecutor
|
||||
/// SubscriptionExecutor class encapsulates the implementation details of
|
||||
/// subscribe/unsubscribe to elements (e.g.: actors or tasks or objects or nodes).
|
||||
/// Support subscribing to a specific element or subscribing to all elements.
|
||||
template <typename ID, typename Data, typename Table>
|
||||
class SubscriptionExecutor {
|
||||
public:
|
||||
explicit SubscriptionExecutor(Table &table) : table_(table) {}
|
||||
|
||||
~SubscriptionExecutor() {}
|
||||
|
||||
/// Subscribe to operations of all elements.
|
||||
/// Repeated subscription will return a failure.
|
||||
///
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each update will be received. Else, only
|
||||
/// messages for the given node will be received.
|
||||
/// \param subscribe Callback that will be called each time when an element
|
||||
/// is registered or updated.
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncSubscribeAll(const NodeID &node_id,
|
||||
const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Subscribe to operations of an element.
|
||||
/// Repeated subscription to an element will return a failure.
|
||||
///
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each update will be received. Else, only
|
||||
/// messages for the given node will be received.
|
||||
/// \param id The id of the element to be subscribe to.
|
||||
/// \param subscribe Callback that will be called each time when the element
|
||||
/// is registered or updated.
|
||||
/// \param done Callback that will be called when subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncSubscribe(const NodeID &node_id, const ID &id,
|
||||
const SubscribeCallback<ID, Data> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Cancel subscription to an element.
|
||||
/// Unsubscribing can only be called after the subscription request is completed.
|
||||
///
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each update will be received. Else, only
|
||||
/// messages for the given node will be received.
|
||||
/// \param id The id of the element to be unsubscribed to.
|
||||
/// \param done Callback that will be called when cancel subscription is complete.
|
||||
/// \return Status
|
||||
Status AsyncUnsubscribe(const NodeID &node_id, const ID &id,
|
||||
const StatusCallback &done);
|
||||
|
||||
private:
|
||||
Table &table_;
|
||||
|
||||
std::mutex mutex_;
|
||||
|
||||
enum class RegistrationStatus : uint8_t {
|
||||
kNotRegistered,
|
||||
kRegistering,
|
||||
kRegistered,
|
||||
};
|
||||
|
||||
/// Whether successfully registered subscription to GCS.
|
||||
RegistrationStatus registration_status_{RegistrationStatus::kNotRegistered};
|
||||
|
||||
/// List of subscriptions before registration to GCS is done, these callbacks
|
||||
/// will be called when the registration to GCS finishes.
|
||||
std::list<StatusCallback> pending_subscriptions_;
|
||||
|
||||
/// Subscribe Callback of all elements.
|
||||
SubscribeCallback<ID, Data> subscribe_all_callback_{nullptr};
|
||||
|
||||
/// A mapping from element ID to subscription callback.
|
||||
typedef std::unordered_map<ID, SubscribeCallback<ID, Data>> IDToCallbackMap;
|
||||
IDToCallbackMap id_to_callback_map_;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,847 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "ray/gcs/tables.h"
|
||||
|
||||
#include "absl/time/clock.h"
|
||||
#include "ray/common/common_protocol.h"
|
||||
#include "ray/common/grpc_util.h"
|
||||
#include "ray/common/ray_config.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
|
||||
extern "C" {
|
||||
#include "hiredis/hiredis.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
static const std::string kTableAppendCommand = "RAY.TABLE_APPEND";
|
||||
static const std::string kChainTableAppendCommand = "RAY.CHAIN.TABLE_APPEND";
|
||||
|
||||
static const std::string kTableAddCommand = "RAY.TABLE_ADD";
|
||||
static const std::string kChainTableAddCommand = "RAY.CHAIN.TABLE_ADD";
|
||||
|
||||
std::string GetLogAppendCommand(const ray::gcs::CommandType command_type) {
|
||||
if (command_type == ray::gcs::CommandType::kRegular) {
|
||||
return kTableAppendCommand;
|
||||
} else {
|
||||
RAY_CHECK(command_type == ray::gcs::CommandType::kChain);
|
||||
return kChainTableAppendCommand;
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetTableAddCommand(const ray::gcs::CommandType command_type) {
|
||||
if (command_type == ray::gcs::CommandType::kRegular) {
|
||||
return kTableAddCommand;
|
||||
} else {
|
||||
RAY_CHECK(command_type == ray::gcs::CommandType::kChain);
|
||||
return kChainTableAddCommand;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Append(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done) {
|
||||
num_appends_++;
|
||||
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto status = reply->ReadAsStatus();
|
||||
// Failed to append the entry.
|
||||
RAY_CHECK(status.ok()) << "Failed to execute command TABLE_APPEND:"
|
||||
<< status.ToString();
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, *data);
|
||||
}
|
||||
};
|
||||
std::string str = data->SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(),
|
||||
str.length(), prefix_, pubsub_channel_,
|
||||
std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::SyncAppend(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data) {
|
||||
num_appends_++;
|
||||
std::string str = data->SerializeAsString();
|
||||
auto reply =
|
||||
GetRedisContext(id)->RunSync(GetLogAppendCommand(command_type_), id, str.data(),
|
||||
str.length(), prefix_, pubsub_channel_);
|
||||
Status status = reply ? reply->ReadAsStatus() : Status::RedisError("Redis error");
|
||||
return status;
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::AppendAt(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done, const WriteCallback &failure,
|
||||
int log_length) {
|
||||
num_appends_++;
|
||||
auto callback = [this, id, data, done, failure](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto status = reply->ReadAsStatus();
|
||||
if (status.ok()) {
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, *data);
|
||||
}
|
||||
} else {
|
||||
if (failure != nullptr) {
|
||||
(failure)(client_, id, *data);
|
||||
}
|
||||
}
|
||||
};
|
||||
std::string str = data->SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(),
|
||||
str.length(), prefix_, pubsub_channel_,
|
||||
std::move(callback), log_length);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &lookup) {
|
||||
num_lookups_++;
|
||||
auto callback = [this, id, lookup](std::shared_ptr<CallbackReply> reply) {
|
||||
if (lookup != nullptr) {
|
||||
std::vector<Data> results;
|
||||
if (!reply->IsNil()) {
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.ParseFromString(reply->ReadAsString());
|
||||
RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id);
|
||||
for (int64_t i = 0; i < gcs_entry.entries_size(); i++) {
|
||||
Data data;
|
||||
data.ParseFromString(gcs_entry.entries(i));
|
||||
results.emplace_back(std::move(data));
|
||||
}
|
||||
}
|
||||
lookup(client_, id, results);
|
||||
}
|
||||
};
|
||||
std::vector<uint8_t> nil;
|
||||
return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id,
|
||||
const GcsChangeMode change_mode,
|
||||
const std::vector<Data> &data) {
|
||||
RAY_CHECK(change_mode != GcsChangeMode::REMOVE);
|
||||
subscribe(client, id, data);
|
||||
};
|
||||
return Subscribe(job_id, node_id, subscribe_wrapper, done);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ == -1)
|
||||
<< "Client called Subscribe twice on the same table";
|
||||
auto callback = [this, subscribe, done](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto data = reply->ReadAsPubsubData();
|
||||
|
||||
if (data.empty()) {
|
||||
// No notification data is provided. This is the callback for the
|
||||
// initial subscription request.
|
||||
if (done != nullptr) {
|
||||
done(client_);
|
||||
}
|
||||
} else {
|
||||
// Data is provided. This is the callback for a message.
|
||||
if (subscribe != nullptr) {
|
||||
// Parse the notification.
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.ParseFromString(data);
|
||||
ID id = ID::FromBinary(gcs_entry.id());
|
||||
std::vector<Data> results;
|
||||
for (int64_t i = 0; i < gcs_entry.entries_size(); i++) {
|
||||
Data result;
|
||||
result.ParseFromString(gcs_entry.entries(i));
|
||||
results.emplace_back(std::move(result));
|
||||
}
|
||||
subscribe(client_, id, gcs_entry.change_mode(), results);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
subscribe_callback_index_ = 1;
|
||||
for (auto &context : shard_contexts_) {
|
||||
RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback,
|
||||
&subscribe_callback_index_));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ >= 0)
|
||||
<< "Client requested notifications on a key before Subscribe completed";
|
||||
|
||||
RedisCallback callback = nullptr;
|
||||
if (done != nullptr) {
|
||||
callback = [done](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto status = reply->IsNil()
|
||||
? Status::OK()
|
||||
: Status::RedisError("request notifications failed.");
|
||||
done(status);
|
||||
};
|
||||
}
|
||||
|
||||
return GetRedisContext(id)->RunAsync("RAY.TABLE_REQUEST_NOTIFICATIONS", id,
|
||||
node_id.Data(), node_id.Size(), prefix_,
|
||||
pubsub_channel_, callback);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Log<ID, Data>::CancelNotifications(const JobID &job_id, const ID &id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ >= 0)
|
||||
<< "Client canceled notifications on a key before Subscribe completed";
|
||||
|
||||
RedisCallback callback = nullptr;
|
||||
if (done != nullptr) {
|
||||
callback = [done](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto status = reply->ReadAsStatus();
|
||||
done(status);
|
||||
};
|
||||
}
|
||||
|
||||
return GetRedisContext(id)->RunAsync("RAY.TABLE_CANCEL_NOTIFICATIONS", id,
|
||||
node_id.Data(), node_id.Size(), prefix_,
|
||||
pubsub_channel_, callback);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
void Log<ID, Data>::Delete(const JobID &job_id, const std::vector<ID> &ids) {
|
||||
if (ids.empty()) {
|
||||
return;
|
||||
}
|
||||
std::unordered_map<RedisContext *, std::ostringstream> sharded_data;
|
||||
for (const auto &id : ids) {
|
||||
sharded_data[GetRedisContext(id).get()] << id.Binary();
|
||||
}
|
||||
// Breaking really large deletion commands into batches of smaller size.
|
||||
const size_t batch_size =
|
||||
RayConfig::instance().maximum_gcs_deletion_batch_size() * ID::Size();
|
||||
for (const auto &pair : sharded_data) {
|
||||
std::string current_data = pair.second.str();
|
||||
for (size_t cur = 0; cur < pair.second.str().size(); cur += batch_size) {
|
||||
size_t data_field_size = std::min(batch_size, current_data.size() - cur);
|
||||
uint16_t id_count = data_field_size / ID::Size();
|
||||
// Send data contains id count and all the id data.
|
||||
std::string send_data(data_field_size + sizeof(id_count), 0);
|
||||
uint8_t *buffer = reinterpret_cast<uint8_t *>(&send_data[0]);
|
||||
*reinterpret_cast<uint16_t *>(buffer) = id_count;
|
||||
RAY_IGNORE_EXPR(
|
||||
std::copy_n(reinterpret_cast<const uint8_t *>(current_data.c_str() + cur),
|
||||
data_field_size, buffer + sizeof(uint16_t)));
|
||||
|
||||
RAY_IGNORE_EXPR(
|
||||
pair.first->RunAsync("RAY.TABLE_DELETE", UniqueID::Nil(),
|
||||
reinterpret_cast<const uint8_t *>(send_data.c_str()),
|
||||
send_data.size(), prefix_, pubsub_channel_,
|
||||
/*redisCallback=*/nullptr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
void Log<ID, Data>::Delete(const JobID &job_id, const ID &id) {
|
||||
Delete(job_id, std::vector<ID>({id}));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
std::string Log<ID, Data>::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << "num lookups: " << num_lookups_ << ", num appends: " << num_appends_;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Add(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done) {
|
||||
num_adds_++;
|
||||
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, *data);
|
||||
}
|
||||
};
|
||||
std::string str = data->SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync(GetTableAddCommand(command_type_), id, str.data(),
|
||||
str.length(), prefix_, pubsub_channel_,
|
||||
std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &lookup,
|
||||
const FailureCallback &failure) {
|
||||
num_lookups_++;
|
||||
return Log<ID, Data>::Lookup(job_id, id,
|
||||
[lookup, failure](RedisGcsClient *client, const ID &id,
|
||||
const std::vector<Data> &data) {
|
||||
if (data.empty()) {
|
||||
if (failure != nullptr) {
|
||||
(failure)(client, id);
|
||||
}
|
||||
} else {
|
||||
RAY_CHECK(data.size() == 1);
|
||||
if (lookup != nullptr) {
|
||||
(lookup)(client, id, data[0]);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const Callback &subscribe,
|
||||
const FailureCallback &failure,
|
||||
const SubscriptionCallback &done) {
|
||||
return Log<ID, Data>::Subscribe(
|
||||
job_id, node_id,
|
||||
[subscribe, failure](RedisGcsClient *client, const ID &id,
|
||||
const std::vector<Data> &data) {
|
||||
RAY_CHECK(data.empty() || data.size() == 1);
|
||||
if (data.size() == 1) {
|
||||
subscribe(client, id, data[0]);
|
||||
} else {
|
||||
if (failure != nullptr) {
|
||||
failure(client, id);
|
||||
}
|
||||
}
|
||||
},
|
||||
done);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
return Subscribe(job_id, node_id, subscribe, /*failure*/ nullptr, done);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
std::string Table<ID, Data>::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Set<ID, Data>::Add(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data, const WriteCallback &done) {
|
||||
num_adds_++;
|
||||
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, *data);
|
||||
}
|
||||
};
|
||||
std::string str = data->SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync("RAY.SET_ADD", id, str.data(), str.length(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Set<ID, Data>::Remove(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done) {
|
||||
num_removes_++;
|
||||
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, *data);
|
||||
}
|
||||
};
|
||||
std::string str = data->SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync("RAY.SET_REMOVE", id, str.data(), str.length(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Set<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id,
|
||||
const GcsChangeMode change_mode,
|
||||
const std::vector<Data> &data) {
|
||||
ArrayNotification<Data> change_notification(change_mode, data);
|
||||
std::vector<ArrayNotification<Data>> notification_vec;
|
||||
notification_vec.emplace_back(std::move(change_notification));
|
||||
subscribe(client, id, notification_vec);
|
||||
};
|
||||
return Log<ID, Data>::Subscribe(job_id, node_id, on_subscribe, done);
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
std::string Set<ID, Data>::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_
|
||||
<< ", num removes: " << num_removes_;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Hash<ID, Data>::Update(const JobID &job_id, const ID &id, const DataMap &data_map,
|
||||
const HashCallback &done) {
|
||||
num_adds_++;
|
||||
auto callback = [this, id, data_map, done](std::shared_ptr<CallbackReply> reply) {
|
||||
if (done != nullptr) {
|
||||
(done)(client_, id, data_map);
|
||||
}
|
||||
};
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.set_id(id.Binary());
|
||||
gcs_entry.set_change_mode(GcsChangeMode::APPEND_OR_ADD);
|
||||
for (const auto &pair : data_map) {
|
||||
gcs_entry.add_entries(pair.first);
|
||||
gcs_entry.add_entries(pair.second->SerializeAsString());
|
||||
}
|
||||
std::string str = gcs_entry.SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Hash<ID, Data>::RemoveEntries(const JobID &job_id, const ID &id,
|
||||
const std::vector<std::string> &keys,
|
||||
const HashRemoveCallback &remove_callback) {
|
||||
num_removes_++;
|
||||
auto callback = [this, id, keys,
|
||||
remove_callback](std::shared_ptr<CallbackReply> reply) {
|
||||
if (remove_callback != nullptr) {
|
||||
(remove_callback)(client_, id, keys);
|
||||
}
|
||||
};
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.set_id(id.Binary());
|
||||
gcs_entry.set_change_mode(GcsChangeMode::REMOVE);
|
||||
for (const auto &key : keys) {
|
||||
gcs_entry.add_entries(key);
|
||||
}
|
||||
std::string str = gcs_entry.SerializeAsString();
|
||||
return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
std::string Hash<ID, Data>::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_
|
||||
<< ", num removes: " << num_removes_;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Hash<ID, Data>::Lookup(const JobID &job_id, const ID &id,
|
||||
const HashCallback &lookup) {
|
||||
num_lookups_++;
|
||||
auto callback = [this, id, lookup](std::shared_ptr<CallbackReply> reply) {
|
||||
if (lookup != nullptr) {
|
||||
DataMap results;
|
||||
if (!reply->IsNil()) {
|
||||
const auto data = reply->ReadAsString();
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.ParseFromString(reply->ReadAsString());
|
||||
RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id);
|
||||
RAY_CHECK(gcs_entry.entries_size() % 2 == 0);
|
||||
for (int i = 0; i < gcs_entry.entries_size(); i += 2) {
|
||||
const auto &key = gcs_entry.entries(i);
|
||||
const auto value = std::make_shared<Data>();
|
||||
value->ParseFromString(gcs_entry.entries(i + 1));
|
||||
results.emplace(key, std::move(value));
|
||||
}
|
||||
}
|
||||
lookup(client_, id, results);
|
||||
}
|
||||
};
|
||||
std::vector<uint8_t> nil;
|
||||
return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(),
|
||||
prefix_, pubsub_channel_, std::move(callback));
|
||||
}
|
||||
|
||||
template <typename ID, typename Data>
|
||||
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
RAY_CHECK(subscribe_callback_index_ == -1)
|
||||
<< "Client called Subscribe twice on the same table";
|
||||
auto callback = [this, subscribe, done](std::shared_ptr<CallbackReply> reply) {
|
||||
const auto data = reply->ReadAsPubsubData();
|
||||
if (data.empty()) {
|
||||
// No notification data is provided. This is the callback for the
|
||||
// initial subscription request.
|
||||
if (done != nullptr) {
|
||||
done(client_);
|
||||
}
|
||||
} else {
|
||||
// Data is provided. This is the callback for a message.
|
||||
if (subscribe != nullptr) {
|
||||
// Parse the notification.
|
||||
GcsEntry gcs_entry;
|
||||
gcs_entry.ParseFromString(data);
|
||||
ID id = ID::FromBinary(gcs_entry.id());
|
||||
DataMap data_map;
|
||||
if (gcs_entry.change_mode() == GcsChangeMode::REMOVE) {
|
||||
for (const auto &key : gcs_entry.entries()) {
|
||||
data_map.emplace(key, std::shared_ptr<Data>());
|
||||
}
|
||||
} else {
|
||||
RAY_CHECK(gcs_entry.entries_size() % 2 == 0);
|
||||
for (int i = 0; i < gcs_entry.entries_size(); i += 2) {
|
||||
const auto &key = gcs_entry.entries(i);
|
||||
const auto value = std::make_shared<Data>();
|
||||
value->ParseFromString(gcs_entry.entries(i + 1));
|
||||
data_map.emplace(key, std::move(value));
|
||||
}
|
||||
}
|
||||
MapNotification<std::string, Data> notification(gcs_entry.change_mode(),
|
||||
data_map);
|
||||
std::vector<MapNotification<std::string, Data>> notification_vec;
|
||||
notification_vec.emplace_back(std::move(notification));
|
||||
subscribe(client_, id, notification_vec);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
subscribe_callback_index_ = 1;
|
||||
for (auto &context : shard_contexts_) {
|
||||
RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback,
|
||||
&subscribe_callback_index_));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::string ProfileTable::DebugString() const {
|
||||
return Log<UniqueID, ProfileTableData>::DebugString();
|
||||
}
|
||||
|
||||
void NodeTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback) {
|
||||
RAY_CHECK(node_change_callback_ == nullptr);
|
||||
node_change_callback_ = callback;
|
||||
// Call the callback for any added clients that are cached.
|
||||
for (const auto &entry : node_cache_) {
|
||||
if (!entry.first.IsNil()) {
|
||||
RAY_CHECK(entry.second.state() == GcsNodeInfo::ALIVE ||
|
||||
entry.second.state() == GcsNodeInfo::DEAD);
|
||||
node_change_callback_(entry.first, entry.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NodeTable::HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info) {
|
||||
NodeID node_id = NodeID::FromBinary(node_info.node_id());
|
||||
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
|
||||
// It's possible to get duplicate notifications from the client table, so
|
||||
// check whether this notification is new.
|
||||
auto entry = node_cache_.find(node_id);
|
||||
bool is_notif_new;
|
||||
if (entry == node_cache_.end()) {
|
||||
// If the entry is not in the cache, then the notification is new.
|
||||
is_notif_new = true;
|
||||
} else {
|
||||
// If the entry is in the cache, then the notification is new if the client
|
||||
// was alive and is now dead or resources have been updated.
|
||||
bool was_alive = (entry->second.state() == GcsNodeInfo::ALIVE);
|
||||
is_notif_new = was_alive && !is_alive;
|
||||
// Once a node with a given ID has been removed, it should never be added
|
||||
// again. If the entry was in the cache and the node was deleted, check
|
||||
// that this new notification is not an insertion.
|
||||
if (!was_alive) {
|
||||
RAY_CHECK(!is_alive)
|
||||
<< "Notification for addition of a node that was already removed:" << node_id;
|
||||
}
|
||||
}
|
||||
|
||||
// Add the notification to our cache. Notifications are idempotent.
|
||||
RAY_LOG(DEBUG) << "[NodeTableNotification] NodeTable Insertion/Deletion "
|
||||
"notification for node id "
|
||||
<< node_id << ". IsAlive: " << is_alive
|
||||
<< ". Setting the node cache to data.";
|
||||
node_cache_[node_id] = node_info;
|
||||
|
||||
// If the notification is new, call any registered callbacks.
|
||||
GcsNodeInfo &cache_data = node_cache_[node_id];
|
||||
if (is_notif_new) {
|
||||
if (is_alive) {
|
||||
RAY_CHECK(removed_nodes_.find(node_id) == removed_nodes_.end());
|
||||
} else {
|
||||
// NOTE(swang): The node should be added to this data structure before
|
||||
// the callback gets called, in case the callback depends on the data
|
||||
// structure getting updated.
|
||||
removed_nodes_.insert(node_id);
|
||||
}
|
||||
if (node_change_callback_ != nullptr) {
|
||||
node_change_callback_(node_id, cache_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const NodeID &NodeTable::GetLocalNodeId() const {
|
||||
RAY_CHECK(!local_node_id_.IsNil());
|
||||
return local_node_id_;
|
||||
}
|
||||
|
||||
const GcsNodeInfo &NodeTable::GetLocalNode() const { return local_node_info_; }
|
||||
|
||||
bool NodeTable::IsRemoved(const NodeID &node_id) const {
|
||||
return removed_nodes_.count(node_id) == 1;
|
||||
}
|
||||
|
||||
Status NodeTable::Connect(const GcsNodeInfo &local_node_info) {
|
||||
RAY_CHECK(!disconnected_) << "Tried to reconnect a disconnected node.";
|
||||
RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected.";
|
||||
RAY_CHECK(local_node_info.state() == GcsNodeInfo::ALIVE);
|
||||
|
||||
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info);
|
||||
Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr);
|
||||
if (status.ok()) {
|
||||
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
|
||||
local_node_info_ = local_node_info;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status NodeTable::Disconnect() {
|
||||
local_node_info_.set_state(GcsNodeInfo::DEAD);
|
||||
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info_);
|
||||
Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr);
|
||||
|
||||
if (status.ok()) {
|
||||
// We successfully added the deletion entry. Mark ourselves as disconnected.
|
||||
disconnected_ = true;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
ray::Status NodeTable::MarkConnected(const GcsNodeInfo &node_info,
|
||||
const WriteCallback &done) {
|
||||
RAY_CHECK(node_info.state() == GcsNodeInfo::ALIVE);
|
||||
auto node_info_ptr = std::make_shared<GcsNodeInfo>(node_info);
|
||||
return Append(JobID::Nil(), node_log_key_, node_info_ptr, done);
|
||||
}
|
||||
|
||||
ray::Status NodeTable::MarkDisconnected(const NodeID &dead_node_id,
|
||||
const WriteCallback &done) {
|
||||
auto node_info = std::make_shared<GcsNodeInfo>();
|
||||
node_info->set_node_id(dead_node_id.Binary());
|
||||
node_info->set_state(GcsNodeInfo::DEAD);
|
||||
return Append(JobID::Nil(), node_log_key_, node_info, done);
|
||||
}
|
||||
|
||||
ray::Status NodeTable::SubscribeToNodeChange(
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
|
||||
// Callback for a notification from the client table.
|
||||
auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key,
|
||||
const std::vector<GcsNodeInfo> ¬ifications) {
|
||||
RAY_CHECK(log_key == node_log_key_);
|
||||
std::unordered_map<std::string, GcsNodeInfo> connected_nodes;
|
||||
std::unordered_map<std::string, GcsNodeInfo> disconnected_nodes;
|
||||
for (auto ¬ification : notifications) {
|
||||
// This is temporary fix for Issue 4140 to avoid connect to dead nodes.
|
||||
// TODO(yuhguo): remove this temporary fix after GCS entry is removable.
|
||||
if (notification.state() == GcsNodeInfo::ALIVE) {
|
||||
connected_nodes.emplace(notification.node_id(), notification);
|
||||
} else {
|
||||
auto iter = connected_nodes.find(notification.node_id());
|
||||
if (iter != connected_nodes.end()) {
|
||||
connected_nodes.erase(iter);
|
||||
}
|
||||
disconnected_nodes.emplace(notification.node_id(), notification);
|
||||
}
|
||||
}
|
||||
for (const auto &pair : connected_nodes) {
|
||||
HandleNotification(client, pair.second);
|
||||
}
|
||||
for (const auto &pair : disconnected_nodes) {
|
||||
HandleNotification(client, pair.second);
|
||||
}
|
||||
};
|
||||
|
||||
// Callback to request notifications from the client table once we've
|
||||
// successfully subscribed.
|
||||
auto on_done = [this, subscribe, done](RedisGcsClient *client) {
|
||||
auto on_request_notification_done = [this, subscribe, done](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
if (done != nullptr) {
|
||||
done(status);
|
||||
}
|
||||
// Register node change callbacks after RequestNotification finishes.
|
||||
RegisterNodeChangeCallback(subscribe);
|
||||
};
|
||||
RAY_CHECK_OK(RequestNotifications(JobID::Nil(), node_log_key_, subscribe_id_,
|
||||
on_request_notification_done));
|
||||
};
|
||||
|
||||
// Subscribe to the client table.
|
||||
return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done);
|
||||
}
|
||||
|
||||
bool NodeTable::GetNode(const NodeID &node_id, GcsNodeInfo *node_info) const {
|
||||
RAY_CHECK(!node_id.IsNil());
|
||||
auto entry = node_cache_.find(node_id);
|
||||
auto found = (entry != node_cache_.end());
|
||||
if (found) {
|
||||
*node_info = entry->second;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &NodeTable::GetAllNodes() const {
|
||||
return node_cache_;
|
||||
}
|
||||
|
||||
Status NodeTable::Lookup(const Callback &lookup) {
|
||||
RAY_CHECK(lookup != nullptr);
|
||||
return Log::Lookup(JobID::Nil(), node_log_key_, lookup);
|
||||
}
|
||||
|
||||
std::string NodeTable::DebugString() const {
|
||||
std::stringstream result;
|
||||
result << Log<NodeID, GcsNodeInfo>::DebugString();
|
||||
result << ", cache size: " << node_cache_.size()
|
||||
<< ", num removed: " << removed_nodes_.size();
|
||||
return result.str();
|
||||
}
|
||||
|
||||
Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const Callback &subscribe,
|
||||
const SubscriptionCallback &done) {
|
||||
auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id,
|
||||
const std::vector<TaskLeaseData> &data) {
|
||||
std::vector<boost::optional<TaskLeaseData>> result;
|
||||
for (const auto &item : data) {
|
||||
boost::optional<TaskLeaseData> optional_item(item);
|
||||
result.emplace_back(std::move(optional_item));
|
||||
}
|
||||
if (result.empty()) {
|
||||
boost::optional<TaskLeaseData> optional_item;
|
||||
result.emplace_back(std::move(optional_item));
|
||||
}
|
||||
subscribe(client, task_id, result);
|
||||
};
|
||||
return Table<TaskID, TaskLeaseData>::Subscribe(job_id, node_id, on_subscribe, done);
|
||||
}
|
||||
|
||||
std::vector<ActorID> SyncGetAllActorID(redisContext *redis_context,
|
||||
const std::string &table_prefix) {
|
||||
std::unordered_set<ActorID> actor_id_set;
|
||||
size_t cursor = 0;
|
||||
do {
|
||||
auto r = redisCommand(redis_context, "SCAN %d match %s* count 100", cursor,
|
||||
table_prefix.c_str());
|
||||
auto reply = reinterpret_cast<redisReply *>(r);
|
||||
RAY_CHECK(reply != nullptr && reply->type == REDIS_REPLY_ARRAY);
|
||||
RAY_CHECK(reply->elements == 2);
|
||||
|
||||
// current cursor
|
||||
redisReply *cursor_reply = reply->element[0];
|
||||
RAY_CHECK(cursor_reply != nullptr && cursor_reply->type == REDIS_REPLY_STRING);
|
||||
cursor = std::stoi(std::string(cursor_reply->str, cursor_reply->len));
|
||||
|
||||
// actor ids
|
||||
redisReply *array_reply = reply->element[1];
|
||||
RAY_CHECK(array_reply != nullptr && array_reply->type == REDIS_REPLY_ARRAY);
|
||||
for (size_t i = 0; i < array_reply->elements; ++i) {
|
||||
redisReply *id_reply = array_reply->element[i];
|
||||
RAY_CHECK(id_reply != nullptr && id_reply->type == REDIS_REPLY_STRING);
|
||||
auto id_with_prefix = std::string(id_reply->str, id_reply->len);
|
||||
// The key of actor_checkpoint table and actor_checkpoint_id table have the same
|
||||
// prefix of `ACTOR`, so we should check the length of the key to filter them.
|
||||
if (id_with_prefix.size() == table_prefix.size() + ActorID::Size()) {
|
||||
auto id = ActorID::FromBinary(id_with_prefix.substr(table_prefix.size()));
|
||||
actor_id_set.emplace(id);
|
||||
}
|
||||
}
|
||||
} while (cursor != 0);
|
||||
std::vector<ActorID> actor_id_list;
|
||||
actor_id_list.reserve(actor_id_set.size());
|
||||
actor_id_list.insert(actor_id_list.end(), actor_id_set.begin(), actor_id_set.end());
|
||||
return actor_id_list;
|
||||
}
|
||||
|
||||
std::vector<ActorID> LogBasedActorTable::GetAllActorID() {
|
||||
auto redis_context = client_->primary_context()->sync_context();
|
||||
return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_));
|
||||
}
|
||||
|
||||
Status LogBasedActorTable::Get(const ray::ActorID &actor_id,
|
||||
ray::rpc::ActorTableData *actor_table_data) {
|
||||
RAY_CHECK(actor_table_data != nullptr);
|
||||
auto key = TablePrefix_Name(prefix_) + actor_id.Binary();
|
||||
auto reply = GetRedisContext(actor_id)->RunArgvSync({"LRANGE", key, "-1", "-1"});
|
||||
if (!reply || reply->IsNil()) {
|
||||
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
|
||||
}
|
||||
|
||||
const auto &data_list = reply->ReadAsStringArray();
|
||||
if (data_list.empty()) {
|
||||
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
|
||||
}
|
||||
|
||||
RAY_CHECK(data_list.size() == 1);
|
||||
actor_table_data->ParseFromString(data_list.front());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::vector<ActorID> ActorTable::GetAllActorID() {
|
||||
auto redis_context = client_->primary_context()->sync_context();
|
||||
return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_));
|
||||
}
|
||||
|
||||
Status ActorTable::Get(const ray::ActorID &actor_id,
|
||||
ray::rpc::ActorTableData *actor_table_data) {
|
||||
RAY_CHECK(actor_table_data != nullptr);
|
||||
auto key = TablePrefix_Name(prefix_) + actor_id.Binary();
|
||||
auto reply = GetRedisContext(actor_id)->RunArgvSync({"GET", key});
|
||||
if (!reply || reply->IsNil()) {
|
||||
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
|
||||
}
|
||||
actor_table_data->ParseFromString(reply->ReadAsString());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template class Log<ObjectID, ObjectTableData>;
|
||||
template class Set<ObjectID, ObjectTableData>;
|
||||
template class Log<TaskID, TaskTableData>;
|
||||
template class Table<TaskID, TaskTableData>;
|
||||
template class Log<ActorID, ActorTableData>;
|
||||
template class Log<TaskID, TaskReconstructionData>;
|
||||
template class Table<TaskID, TaskLeaseData>;
|
||||
template class Table<NodeID, HeartbeatTableData>;
|
||||
template class Table<NodeID, ResourceUsageBatchData>;
|
||||
template class Log<NodeID, GcsNodeInfo>;
|
||||
template class Log<JobID, JobTableData>;
|
||||
template class Log<UniqueID, ProfileTableData>;
|
||||
template class Log<NodeID, HeartbeatTableData>;
|
||||
template class Log<NodeID, ResourceUsageBatchData>;
|
||||
template class Log<WorkerID, WorkerTableData>;
|
||||
template class Table<WorkerID, WorkerTableData>;
|
||||
template class Table<ActorID, ActorTableData>;
|
||||
|
||||
template class Log<NodeID, ResourceTableData>;
|
||||
template class Hash<NodeID, ResourceTableData>;
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,978 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ray/common/constants.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/gcs/callback.h"
|
||||
#include "ray/gcs/entry_change_notification.h"
|
||||
#include "ray/gcs/redis_context.h"
|
||||
#include "ray/util/logging.h"
|
||||
#include "src/ray/protobuf/gcs.pb.h"
|
||||
|
||||
struct redisAsyncContext;
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
using rpc::ActorTableData;
|
||||
using rpc::ErrorTableData;
|
||||
using rpc::GcsChangeMode;
|
||||
using rpc::GcsEntry;
|
||||
using rpc::GcsNodeInfo;
|
||||
using rpc::HeartbeatTableData;
|
||||
using rpc::JobTableData;
|
||||
using rpc::ObjectTableData;
|
||||
using rpc::ProfileTableData;
|
||||
using rpc::ResourceTableData;
|
||||
using rpc::ResourceUsageBatchData;
|
||||
using rpc::TablePrefix;
|
||||
using rpc::TablePubsub;
|
||||
using rpc::TaskLeaseData;
|
||||
using rpc::TaskReconstructionData;
|
||||
using rpc::TaskTableData;
|
||||
using rpc::WorkerTableData;
|
||||
|
||||
class RedisContext;
|
||||
|
||||
class RedisGcsClient;
|
||||
|
||||
/// Specifies whether commands issued to a table should be regular or chain-replicated
|
||||
/// (when available).
|
||||
enum class CommandType { kRegular, kChain, kUnknown };
|
||||
|
||||
/// \class PubsubInterface
|
||||
///
|
||||
/// The interface for a pubsub storage system. The client of a storage system
|
||||
/// that implements this interface can request and cancel notifications for
|
||||
/// specific keys.
|
||||
template <typename ID>
|
||||
class PubsubInterface {
|
||||
public:
|
||||
virtual Status RequestNotifications(const JobID &job_id, const ID &id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &done) = 0;
|
||||
virtual Status CancelNotifications(const JobID &job_id, const ID &id,
|
||||
const NodeID &node_id,
|
||||
const StatusCallback &done) = 0;
|
||||
virtual ~PubsubInterface(){};
|
||||
};
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class LogInterface {
|
||||
public:
|
||||
using WriteCallback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id, const Data &data)>;
|
||||
virtual Status Append(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
|
||||
virtual Status AppendAt(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data, const WriteCallback &done,
|
||||
const WriteCallback &failure, int log_length) = 0;
|
||||
virtual ~LogInterface(){};
|
||||
};
|
||||
|
||||
/// \class Log
|
||||
///
|
||||
/// A GCS table where every entry is an append-only log. This class is not
|
||||
/// meant to be used directly. All log classes should derive from this class
|
||||
/// and override the prefix_ member with a unique prefix for that log, and the
|
||||
/// pubsub_channel_ member if pubsub is required.
|
||||
///
|
||||
/// Example tables backed by Log:
|
||||
/// NodeTable: Stores a log of which GCS clients have been added or deleted
|
||||
/// from the system.
|
||||
template <typename ID, typename Data>
|
||||
class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
|
||||
public:
|
||||
using Callback = std::function<void(RedisGcsClient *client, const ID &id,
|
||||
const std::vector<Data> &data)>;
|
||||
|
||||
using NotificationCallback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id,
|
||||
const GcsChangeMode change_mode, const std::vector<Data> &data)>;
|
||||
|
||||
/// The callback to call when a write to a key succeeds.
|
||||
using WriteCallback = typename LogInterface<ID, Data>::WriteCallback;
|
||||
/// The callback to call when a SUBSCRIBE call completes and we are ready to
|
||||
/// request and receive notifications.
|
||||
using SubscriptionCallback = std::function<void(RedisGcsClient *client)>;
|
||||
|
||||
struct CallbackData {
|
||||
ID id;
|
||||
std::shared_ptr<Data> data;
|
||||
Callback callback;
|
||||
// An optional callback to call for subscription operations, where the
|
||||
// first message is a notification of subscription success.
|
||||
SubscriptionCallback subscription_callback;
|
||||
Log<ID, Data> *log;
|
||||
RedisGcsClient *client;
|
||||
};
|
||||
|
||||
Log(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
|
||||
: shard_contexts_(contexts),
|
||||
client_(client),
|
||||
pubsub_channel_(TablePubsub::NO_PUBLISH),
|
||||
prefix_(TablePrefix::UNUSED),
|
||||
subscribe_callback_index_(-1){};
|
||||
|
||||
/// Append a log entry to a key.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param data Data to append to the log. TODO(rkn): This can be made const,
|
||||
/// right?
|
||||
/// \param done Callback that is called once the data has been written to the
|
||||
/// GCS.
|
||||
/// \return Status
|
||||
Status Append(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done);
|
||||
|
||||
/// Append a log entry to a key synchronously.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param data Data to append to the log.
|
||||
/// \return Status
|
||||
Status SyncAppend(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data);
|
||||
|
||||
/// Append a log entry to a key if and only if the log has the given number
|
||||
/// of entries.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param data Data to append to the log.
|
||||
/// \param done Callback that is called if the data was appended to the log.
|
||||
/// \param failure Callback that is called if the data was not appended to
|
||||
/// the log because the log length did not match the given `log_length`.
|
||||
/// \param log_length The number of entries that the log must have for the
|
||||
/// append to succeed.
|
||||
/// \return Status
|
||||
Status AppendAt(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done, const WriteCallback &failure,
|
||||
int log_length);
|
||||
|
||||
/// Lookup the log values at a key asynchronously.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is looked up in the GCS.
|
||||
/// \param lookup Callback that is called after lookup. If the callback is
|
||||
/// called with an empty vector, then there was no data at the key.
|
||||
/// \return Status
|
||||
Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup);
|
||||
|
||||
/// Subscribe to any Append operations to this table. The caller may choose
|
||||
/// requests notifications for. This may only be called once per Log
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each Add to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe Callback that is called on each received message. If the
|
||||
/// callback is called with an empty vector, then there was no data at the key.
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
/// Request notifications about a key in this table.
|
||||
///
|
||||
/// The notifications will be returned via the subscribe callback that was
|
||||
/// registered by `Subscribe`. An initial notification will be returned for
|
||||
/// the current values at the key, if any, and a subsequent notification will
|
||||
/// be published for every following `Append` to the key. Before
|
||||
/// notifications can be requested, the caller must first call `Subscribe`,
|
||||
/// with the same `node_id`.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the key to request notifications for.
|
||||
/// \param node_id The node who is requesting notifications.
|
||||
/// \param done Callback that is called when request notifications is complete.
|
||||
/// notifications can be requested, a call to `Subscribe` to this
|
||||
/// table with the same `node_id` must complete successfully.
|
||||
/// \return Status
|
||||
Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &node_id,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Cancel notifications about a key in this table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the key to request notifications for.
|
||||
/// \param node_id The node who originally requested notifications.
|
||||
/// \param done Callback that is called when cancel notifications is complete.
|
||||
/// \return Status
|
||||
Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &node_id,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Subscribe to any modifications to the key. The caller may choose
|
||||
/// to subscribe to all modifications, or to subscribe only to keys that it
|
||||
/// requests notifications for. This may only be called once per Log
|
||||
/// instance. This function is different from public version due to
|
||||
/// an additional parameter change_mode in NotificationCallback. Therefore this
|
||||
/// function supports notifications of remove operations.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each Add to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe Callback that is called on each received message. If the
|
||||
/// callback is called with an empty vector, then there was no data at the key.
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
/// Delete an entire key from redis.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data to delete from the GCS.
|
||||
/// \return Void.
|
||||
void Delete(const JobID &job_id, const ID &id);
|
||||
|
||||
/// Delete several keys from redis.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param ids The vector of IDs to delete from the GCS.
|
||||
/// \return Void.
|
||||
void Delete(const JobID &job_id, const std::vector<ID> &ids);
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
|
||||
protected:
|
||||
std::shared_ptr<RedisContext> GetRedisContext(const ID &id) {
|
||||
static std::hash<ID> index;
|
||||
return shard_contexts_[index(id) % shard_contexts_.size()];
|
||||
}
|
||||
|
||||
/// The connection to the GCS.
|
||||
std::vector<std::shared_ptr<RedisContext>> shard_contexts_;
|
||||
/// The GCS client.
|
||||
RedisGcsClient *client_;
|
||||
/// The pubsub channel to subscribe to for notifications about keys in this
|
||||
/// table. If no notifications are required, this should be set to
|
||||
/// TablePubsub_NO_PUBLISH. If notifications are required, then this must be
|
||||
/// unique across all instances of Log.
|
||||
TablePubsub pubsub_channel_;
|
||||
/// The prefix to use for keys in this table. This must be unique across all
|
||||
/// instances of Log.
|
||||
TablePrefix prefix_;
|
||||
/// The index in the RedisCallbackManager for the callback that is called
|
||||
/// when we receive notifications. This is >= 0 iff we have subscribed to the
|
||||
/// table, otherwise -1.
|
||||
int64_t subscribe_callback_index_;
|
||||
|
||||
/// Commands to a GCS table can either be regular (default) or chain-replicated.
|
||||
CommandType command_type_ = CommandType::kRegular;
|
||||
|
||||
int64_t num_appends_ = 0;
|
||||
int64_t num_lookups_ = 0;
|
||||
};
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class TableInterface {
|
||||
public:
|
||||
using WriteCallback = typename Log<ID, Data>::WriteCallback;
|
||||
virtual Status Add(const JobID &job_id, const ID &task_id,
|
||||
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
|
||||
virtual ~TableInterface(){};
|
||||
};
|
||||
|
||||
/// \class Table
|
||||
///
|
||||
/// A GCS table where every entry is a single data item. This class is not
|
||||
/// meant to be used directly. All table classes should derive from this class
|
||||
/// and override the prefix_ member with a unique prefix for that table, and
|
||||
/// the pubsub_channel_ member if pubsub is required.
|
||||
///
|
||||
/// Example tables backed by Log:
|
||||
/// TaskTable: Stores Task metadata needed for executing the task.
|
||||
template <typename ID, typename Data>
|
||||
class Table : private Log<ID, Data>,
|
||||
public TableInterface<ID, Data>,
|
||||
virtual public PubsubInterface<ID> {
|
||||
public:
|
||||
using Callback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id, const Data &data)>;
|
||||
using WriteCallback = typename Log<ID, Data>::WriteCallback;
|
||||
/// The callback to call when a Lookup call returns an empty entry.
|
||||
using FailureCallback = std::function<void(RedisGcsClient *client, const ID &id)>;
|
||||
/// The callback to call when a Subscribe call completes and we are ready to
|
||||
/// request and receive notifications.
|
||||
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
|
||||
|
||||
Table(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log<ID, Data>(contexts, client) {}
|
||||
|
||||
using Log<ID, Data>::RequestNotifications;
|
||||
using Log<ID, Data>::CancelNotifications;
|
||||
/// Expose this interface for use by subscription tools class SubscriptionExecutor.
|
||||
/// In this way TaskTable() can also reuse class SubscriptionExecutor.
|
||||
using Log<ID, Data>::Subscribe;
|
||||
|
||||
/// Add an entry to the table. This overwrites any existing data at the key.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param data Data that is added to the GCS.
|
||||
/// \param done Callback that is called once the data has been written to the
|
||||
/// GCS.
|
||||
/// \return Status
|
||||
Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done);
|
||||
|
||||
/// Lookup an entry asynchronously.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is looked up in the GCS.
|
||||
/// \param lookup Callback that is called after lookup if there was data the
|
||||
/// key.
|
||||
/// \param failure Callback that is called after lookup if there was no data
|
||||
/// at the key.
|
||||
/// \return Status
|
||||
Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup,
|
||||
const FailureCallback &failure);
|
||||
|
||||
/// Subscribe to any Add operations to this table. The caller may choose to
|
||||
/// subscribe to all Adds, or to subscribe only to keys that it requests
|
||||
/// notifications for. This may only be called once per Table instance.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each Add to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe Callback that is called on each received message. If the
|
||||
/// callback is called with an empty vector, then there was no data at the key.
|
||||
/// \param failure Callback that is called if the key is empty at the time
|
||||
/// that notifications are requested.
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
|
||||
const FailureCallback &failure, const SubscriptionCallback &done);
|
||||
|
||||
/// Subscribe to any Add operations to this table. The caller may choose to
|
||||
/// subscribe to all Adds, or to subscribe only to keys that it requests
|
||||
/// notifications for. This may only be called once per Table instance.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each Add to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe Callback that is called on each received message. If the
|
||||
/// callback is called with an empty vector, then there was no data at the key.
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
void Delete(const JobID &job_id, const ID &id) { Log<ID, Data>::Delete(job_id, id); }
|
||||
|
||||
void Delete(const JobID &job_id, const std::vector<ID> &ids) {
|
||||
Log<ID, Data>::Delete(job_id, ids);
|
||||
}
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
|
||||
protected:
|
||||
using Log<ID, Data>::shard_contexts_;
|
||||
using Log<ID, Data>::client_;
|
||||
using Log<ID, Data>::pubsub_channel_;
|
||||
using Log<ID, Data>::prefix_;
|
||||
using Log<ID, Data>::command_type_;
|
||||
using Log<ID, Data>::GetRedisContext;
|
||||
|
||||
int64_t num_adds_ = 0;
|
||||
int64_t num_lookups_ = 0;
|
||||
};
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class SetInterface {
|
||||
public:
|
||||
using WriteCallback = typename Log<ID, Data>::WriteCallback;
|
||||
virtual Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done) = 0;
|
||||
virtual Status Remove(const JobID &job_id, const ID &id,
|
||||
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
|
||||
virtual ~SetInterface(){};
|
||||
};
|
||||
|
||||
/// \class Set
|
||||
///
|
||||
/// A GCS table where every entry is an addable & removable set. This class is not
|
||||
/// meant to be used directly. All set classes should derive from this class
|
||||
/// and override the prefix_ member with a unique prefix for that set, and the
|
||||
/// pubsub_channel_ member if pubsub is required.
|
||||
///
|
||||
/// Example tables backed by Set:
|
||||
/// ObjectTable: Stores a set of which clients have added an object.
|
||||
template <typename ID, typename Data>
|
||||
class Set : private Log<ID, Data>,
|
||||
public SetInterface<ID, Data>,
|
||||
virtual public PubsubInterface<ID> {
|
||||
public:
|
||||
using Callback = typename Log<ID, Data>::Callback;
|
||||
using WriteCallback = typename Log<ID, Data>::WriteCallback;
|
||||
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
|
||||
|
||||
Set(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
|
||||
: Log<ID, Data>(contexts, client) {}
|
||||
|
||||
using Log<ID, Data>::RequestNotifications;
|
||||
using Log<ID, Data>::CancelNotifications;
|
||||
using Log<ID, Data>::Lookup;
|
||||
using Log<ID, Data>::Delete;
|
||||
|
||||
/// Add an entry to the set.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param data Data to add to the set.
|
||||
/// \param done Callback that is called once the data has been written to the
|
||||
/// GCS.
|
||||
/// \return Status
|
||||
Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done);
|
||||
|
||||
/// Remove an entry from the set.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is removed from the GCS.
|
||||
/// \param data Data to remove from the set.
|
||||
/// \param done Callback that is called once the data has been written to the
|
||||
/// GCS.
|
||||
/// \return Status
|
||||
Status Remove(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
|
||||
const WriteCallback &done);
|
||||
|
||||
using NotificationCallback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id,
|
||||
const std::vector<ArrayNotification<Data>> &data)>;
|
||||
/// Subscribe to any add or remove operations to this table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each add or remove to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe Callback that is called on each received message.
|
||||
/// \param done Callback that is called when subscription is complete and we
|
||||
/// are ready to receive messages.
|
||||
/// \return Status
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const NotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
|
||||
protected:
|
||||
using Log<ID, Data>::shard_contexts_;
|
||||
using Log<ID, Data>::client_;
|
||||
using Log<ID, Data>::pubsub_channel_;
|
||||
using Log<ID, Data>::prefix_;
|
||||
using Log<ID, Data>::GetRedisContext;
|
||||
|
||||
int64_t num_adds_ = 0;
|
||||
int64_t num_removes_ = 0;
|
||||
using Log<ID, Data>::num_lookups_;
|
||||
};
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class HashInterface {
|
||||
public:
|
||||
using DataMap = std::unordered_map<std::string, std::shared_ptr<Data>>;
|
||||
// Reuse Log's SubscriptionCallback when Subscribe is successfully called.
|
||||
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
|
||||
|
||||
/// The callback function used by function Update & Lookup.
|
||||
///
|
||||
/// \param client The client on which the RemoveEntries is called.
|
||||
/// \param id The ID of the Hash Table whose entries are removed.
|
||||
/// \param data Map data contains the change to the Hash Table.
|
||||
/// \return Void
|
||||
using HashCallback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id, const DataMap &pairs)>;
|
||||
|
||||
/// The callback function used by function RemoveEntries.
|
||||
///
|
||||
/// \param client The client on which the RemoveEntries is called.
|
||||
/// \param id The ID of the Hash Table whose entries are removed.
|
||||
/// \param keys The keys that are moved from this Hash Table.
|
||||
/// \return Void
|
||||
using HashRemoveCallback = std::function<void(RedisGcsClient *client, const ID &id,
|
||||
const std::vector<std::string> &keys)>;
|
||||
|
||||
/// The notification function used by function Subscribe.
|
||||
///
|
||||
/// \param client The client on which the Subscribe is called.
|
||||
/// \param change_mode The mode to identify the data is removed or updated.
|
||||
/// \param data Map data contains the change to the Hash Table.
|
||||
/// \return Void
|
||||
using HashNotificationCallback =
|
||||
std::function<void(RedisGcsClient *client, const ID &id,
|
||||
const std::vector<MapNotification<std::string, Data>> &data)>;
|
||||
|
||||
/// Add entries of a hash table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is added to the GCS.
|
||||
/// \param pairs Map data to add to the hash table.
|
||||
/// \param done HashCallback that is called once the request data has been written to
|
||||
/// the GCS.
|
||||
/// \return Status
|
||||
virtual Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
|
||||
const HashCallback &done) = 0;
|
||||
|
||||
/// Remove entries from the hash table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is removed from the GCS.
|
||||
/// \param keys The entry keys of the hash table.
|
||||
/// \param remove_callback HashRemoveCallback that is called once the data has been
|
||||
/// written to the GCS no matter whether the key exists in the hash table.
|
||||
/// \return Status
|
||||
virtual Status RemoveEntries(const JobID &job_id, const ID &id,
|
||||
const std::vector<std::string> &keys,
|
||||
const HashRemoveCallback &remove_callback) = 0;
|
||||
|
||||
/// Lookup the map data of a hash table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param id The ID of the data that is looked up in the GCS.
|
||||
/// \param lookup HashCallback that is called after lookup. If the callback is
|
||||
/// called with an empty hash table, then there was no data in the callback.
|
||||
/// \return Status
|
||||
virtual Status Lookup(const JobID &job_id, const ID &id,
|
||||
const HashCallback &lookup) = 0;
|
||||
|
||||
/// Subscribe to any Update or Remove operations to this hash table.
|
||||
///
|
||||
/// \param job_id The ID of the job.
|
||||
/// \param node_id The type of update to listen to. If this is nil, then a
|
||||
/// message for each Update to the table will be received. Else, only
|
||||
/// messages for the given node will be received. In the latter
|
||||
/// case, the node may request notifications on specific keys in the
|
||||
/// table via `RequestNotifications`.
|
||||
/// \param subscribe HashNotificationCallback that is called on each received message.
|
||||
/// \param done SubscriptionCallback that is called when subscription is complete and
|
||||
/// we are ready to receive messages.
|
||||
/// \return Status
|
||||
virtual Status Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) = 0;
|
||||
|
||||
virtual ~HashInterface(){};
|
||||
};
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class Hash : private Log<ID, Data>,
|
||||
public HashInterface<ID, Data>,
|
||||
virtual public PubsubInterface<ID> {
|
||||
public:
|
||||
using DataMap = std::unordered_map<std::string, std::shared_ptr<Data>>;
|
||||
using HashCallback = typename HashInterface<ID, Data>::HashCallback;
|
||||
using HashRemoveCallback = typename HashInterface<ID, Data>::HashRemoveCallback;
|
||||
using HashNotificationCallback =
|
||||
typename HashInterface<ID, Data>::HashNotificationCallback;
|
||||
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
|
||||
|
||||
Hash(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
|
||||
: Log<ID, Data>(contexts, client) {}
|
||||
|
||||
using Log<ID, Data>::RequestNotifications;
|
||||
using Log<ID, Data>::CancelNotifications;
|
||||
|
||||
Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
|
||||
const HashCallback &done) override;
|
||||
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id,
|
||||
const HashNotificationCallback &subscribe,
|
||||
const SubscriptionCallback &done) override;
|
||||
|
||||
Status Lookup(const JobID &job_id, const ID &id, const HashCallback &lookup) override;
|
||||
|
||||
Status RemoveEntries(const JobID &job_id, const ID &id,
|
||||
const std::vector<std::string> &keys,
|
||||
const HashRemoveCallback &remove_callback) override;
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
|
||||
protected:
|
||||
using Log<ID, Data>::shard_contexts_;
|
||||
using Log<ID, Data>::client_;
|
||||
using Log<ID, Data>::pubsub_channel_;
|
||||
using Log<ID, Data>::prefix_;
|
||||
using Log<ID, Data>::subscribe_callback_index_;
|
||||
using Log<ID, Data>::GetRedisContext;
|
||||
|
||||
int64_t num_adds_ = 0;
|
||||
int64_t num_removes_ = 0;
|
||||
using Log<ID, Data>::num_lookups_;
|
||||
};
|
||||
|
||||
class DynamicResourceTable : public Hash<NodeID, ResourceTableData> {
|
||||
public:
|
||||
DynamicResourceTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Hash(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::NODE_RESOURCE_PUBSUB;
|
||||
prefix_ = TablePrefix::NODE_RESOURCE;
|
||||
};
|
||||
|
||||
virtual ~DynamicResourceTable(){};
|
||||
};
|
||||
|
||||
class ObjectTable : public Set<ObjectID, ObjectTableData> {
|
||||
public:
|
||||
ObjectTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Set(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::OBJECT_PUBSUB;
|
||||
prefix_ = TablePrefix::OBJECT;
|
||||
};
|
||||
|
||||
virtual ~ObjectTable(){};
|
||||
};
|
||||
|
||||
class HeartbeatTable : public Table<NodeID, HeartbeatTableData> {
|
||||
public:
|
||||
HeartbeatTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::HEARTBEAT_PUBSUB;
|
||||
prefix_ = TablePrefix::HEARTBEAT;
|
||||
}
|
||||
virtual ~HeartbeatTable() {}
|
||||
};
|
||||
|
||||
class ResourceUsageBatchTable : public Table<NodeID, ResourceUsageBatchData> {
|
||||
public:
|
||||
ResourceUsageBatchTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::RESOURCE_USAGE_BATCH_PUBSUB;
|
||||
prefix_ = TablePrefix::RESOURCE_USAGE_BATCH;
|
||||
}
|
||||
virtual ~ResourceUsageBatchTable() {}
|
||||
};
|
||||
|
||||
class JobTable : public Log<JobID, JobTableData> {
|
||||
public:
|
||||
JobTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::JOB_PUBSUB;
|
||||
prefix_ = TablePrefix::JOB;
|
||||
};
|
||||
|
||||
virtual ~JobTable() {}
|
||||
};
|
||||
|
||||
/// Log-based Actor table starts with an ALIVE entry, which represents the first time the
|
||||
/// actor is created. This may be followed by 0 or more pairs of RESTARTING, ALIVE
|
||||
/// entries, which represent each time the actor fails (RESTARTING) and gets recreated
|
||||
/// (ALIVE). These may be followed by a DEAD entry, which means that the actor has failed
|
||||
/// and will not be reconstructed.
|
||||
class LogBasedActorTable : public Log<ActorID, ActorTableData> {
|
||||
public:
|
||||
LogBasedActorTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::ACTOR_PUBSUB;
|
||||
prefix_ = TablePrefix::ACTOR;
|
||||
}
|
||||
|
||||
/// Get all actor id synchronously.
|
||||
std::vector<ActorID> GetAllActorID();
|
||||
|
||||
/// Get actor table data by actor id synchronously.
|
||||
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data);
|
||||
};
|
||||
|
||||
/// Actor table.
|
||||
/// This table is only used for GCS-based actor management. And when completely migrate to
|
||||
/// GCS service, the log-based actor table could be removed.
|
||||
class ActorTable : public Table<ActorID, ActorTableData> {
|
||||
public:
|
||||
ActorTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::ACTOR_PUBSUB;
|
||||
prefix_ = TablePrefix::ACTOR;
|
||||
}
|
||||
|
||||
/// Get all actor id synchronously.
|
||||
std::vector<ActorID> GetAllActorID();
|
||||
|
||||
/// Get actor table data by actor id synchronously.
|
||||
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data);
|
||||
};
|
||||
|
||||
class WorkerTable : public Table<WorkerID, WorkerTableData> {
|
||||
public:
|
||||
WorkerTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::WORKER_FAILURE_PUBSUB;
|
||||
prefix_ = TablePrefix::WORKERS;
|
||||
}
|
||||
virtual ~WorkerTable() {}
|
||||
};
|
||||
|
||||
class TaskReconstructionLog : public Log<TaskID, TaskReconstructionData> {
|
||||
public:
|
||||
TaskReconstructionLog(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log(contexts, client) {
|
||||
prefix_ = TablePrefix::TASK_RECONSTRUCTION;
|
||||
}
|
||||
};
|
||||
|
||||
class TaskLeaseTable : public Table<TaskID, TaskLeaseData> {
|
||||
public:
|
||||
/// Use boost::optional to represent subscription results, so that we can
|
||||
/// notify raylet whether the entry of task lease is empty.
|
||||
using Callback =
|
||||
std::function<void(RedisGcsClient *client, const TaskID &task_id,
|
||||
const std::vector<boost::optional<TaskLeaseData>> &data)>;
|
||||
|
||||
TaskLeaseTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::TASK_LEASE_PUBSUB;
|
||||
prefix_ = TablePrefix::TASK_LEASE;
|
||||
}
|
||||
|
||||
Status Add(const JobID &job_id, const TaskID &id,
|
||||
const std::shared_ptr<TaskLeaseData> &data,
|
||||
const WriteCallback &done) override {
|
||||
RAY_RETURN_NOT_OK((Table<TaskID, TaskLeaseData>::Add(job_id, id, data, done)));
|
||||
// Mark the entry for expiration in Redis. It's okay if this command fails
|
||||
// since the lease entry itself contains the expiration period. In the
|
||||
// worst case, if the command fails, then a client that looks up the lease
|
||||
// entry will overestimate the expiration time.
|
||||
// TODO(swang): Use a common helper function to format the key instead of
|
||||
// hardcoding it to match the Redis module.
|
||||
std::vector<std::string> args = {"PEXPIRE", TablePrefix_Name(prefix_) + id.Binary(),
|
||||
std::to_string(data->timeout())};
|
||||
|
||||
return GetRedisContext(id)->RunArgvAsync(args);
|
||||
}
|
||||
|
||||
/// Implement this method for the subscription tools class SubscriptionExecutor.
|
||||
/// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor.
|
||||
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
|
||||
const SubscriptionCallback &done);
|
||||
};
|
||||
|
||||
namespace raylet {
|
||||
|
||||
class TaskTable : public Table<TaskID, TaskTableData> {
|
||||
public:
|
||||
TaskTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Table(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::RAYLET_TASK_PUBSUB;
|
||||
prefix_ = TablePrefix::RAYLET_TASK;
|
||||
}
|
||||
|
||||
TaskTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client, gcs::CommandType command_type)
|
||||
: TaskTable(contexts, client) {
|
||||
command_type_ = command_type;
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace raylet
|
||||
|
||||
class ProfileTable : public Log<UniqueID, ProfileTableData> {
|
||||
public:
|
||||
ProfileTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log(contexts, client) {
|
||||
prefix_ = TablePrefix::PROFILE;
|
||||
};
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
};
|
||||
|
||||
/// \class NodeTable
|
||||
///
|
||||
/// The NodeTable stores information about active and inactive nodes. It is
|
||||
/// structured as a single log stored at a key known to all nodes. When a
|
||||
/// node connects, it appends an entry to the log indicating that it is
|
||||
/// alive. When a node disconnects, or if another node detects its failure,
|
||||
/// it should append an entry to the log indicating that it is dead. A node
|
||||
/// that is marked as dead should never again be marked as alive; if it needs
|
||||
/// to reconnect, it must connect with a different NodeID.
|
||||
class NodeTable : public Log<NodeID, GcsNodeInfo> {
|
||||
public:
|
||||
NodeTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
|
||||
RedisGcsClient *client)
|
||||
: Log(contexts, client) {
|
||||
pubsub_channel_ = TablePubsub::NODE_PUBSUB;
|
||||
prefix_ = TablePrefix::NODE;
|
||||
};
|
||||
|
||||
/// Connect as a NODE to the GCS. This registers us in the NODE table
|
||||
/// and begins subscription to NODE table notifications.
|
||||
///
|
||||
/// \param local_node_info Information about the connecting NODE. This must have the
|
||||
/// same id as the one set in the NODE table.
|
||||
/// \return Status
|
||||
ray::Status Connect(const GcsNodeInfo &local_node_info);
|
||||
|
||||
/// Disconnect the NODE from the GCS. The NODE ID assigned during
|
||||
/// registration should never be reused after disconnecting.
|
||||
///
|
||||
/// \return Status
|
||||
ray::Status Disconnect();
|
||||
|
||||
/// Mark a new node as connected to GCS asynchronously.
|
||||
///
|
||||
/// \param node_info Information about the node.
|
||||
/// \param done Callback that is called once the node has been marked to connected.
|
||||
/// \return Status
|
||||
ray::Status MarkConnected(const GcsNodeInfo &node_info, const WriteCallback &done);
|
||||
|
||||
/// Mark a different node as disconnected. The NODE ID should never be
|
||||
/// reused for a new node.
|
||||
///
|
||||
/// \param dead_node_id The ID of the node to mark as dead.
|
||||
/// \param done Callback that is called once the node has been marked to
|
||||
/// disconnected.
|
||||
/// \return Status
|
||||
ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done);
|
||||
|
||||
ray::Status SubscribeToNodeChange(
|
||||
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
|
||||
const StatusCallback &done);
|
||||
|
||||
/// Get a node's information from the cache. The cache only contains
|
||||
/// information for nodes that we've heard a notification for.
|
||||
///
|
||||
/// \param node The node to get information about.
|
||||
/// \param node_info The node information will be copied here if
|
||||
/// we have the node in the cache.
|
||||
/// a nil node ID.
|
||||
/// \return Whether the node is in the cache.
|
||||
bool GetNode(const NodeID &node, GcsNodeInfo *node_info) const;
|
||||
|
||||
/// Get the local node's ID.
|
||||
///
|
||||
/// \return The local node's ID.
|
||||
const NodeID &GetLocalNodeId() const;
|
||||
|
||||
/// Get the local node's information.
|
||||
///
|
||||
/// \return The local node's information.
|
||||
const GcsNodeInfo &GetLocalNode() const;
|
||||
|
||||
/// Check whether the given node is removed.
|
||||
///
|
||||
/// \param node_id The ID of the node to check.
|
||||
/// \return Whether the node with specified ID is removed.
|
||||
bool IsRemoved(const NodeID &node_id) const;
|
||||
|
||||
/// Get the information of all nodes.
|
||||
///
|
||||
/// \return The node ID to node information map.
|
||||
const std::unordered_map<NodeID, GcsNodeInfo> &GetAllNodes() const;
|
||||
|
||||
/// Lookup the node data in the node table.
|
||||
///
|
||||
/// \param lookup Callback that is called after lookup. If the callback is
|
||||
/// called with an empty vector, then there was no data at the key.
|
||||
/// \return Status.
|
||||
Status Lookup(const Callback &lookup);
|
||||
|
||||
/// Returns debug string for class.
|
||||
///
|
||||
/// \return string.
|
||||
std::string DebugString() const;
|
||||
|
||||
/// The key at which the log of node information is stored. This key must
|
||||
/// be kept the same across all instances of the NodeTable, so that all
|
||||
/// nodes append and read from the same key.
|
||||
NodeID node_log_key_;
|
||||
|
||||
private:
|
||||
using NodeChangeCallback =
|
||||
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
|
||||
|
||||
/// Register a callback to call when a new node is added or a node is removed.
|
||||
///
|
||||
/// \param callback The callback to register.
|
||||
void RegisterNodeChangeCallback(const NodeChangeCallback &callback);
|
||||
|
||||
/// Handle a node table notification.
|
||||
void HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info);
|
||||
|
||||
/// Whether this node has called Disconnect().
|
||||
bool disconnected_{false};
|
||||
/// This node's ID. It will be initialized when we call method `Connect(...)`.
|
||||
NodeID local_node_id_;
|
||||
/// Information about this node.
|
||||
GcsNodeInfo local_node_info_;
|
||||
/// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and
|
||||
/// RequestNotification.
|
||||
/// The reason for not using `local_node_id_` is because it is only initialized
|
||||
/// for registered nodes.
|
||||
NodeID subscribe_id_{NodeID::FromRandom()};
|
||||
/// The callback to call when a new node is added or a node is removed.
|
||||
NodeChangeCallback node_change_callback_{nullptr};
|
||||
/// A cache for information about all nodes.
|
||||
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
|
||||
/// The set of removed nodes.
|
||||
std::unordered_set<NodeID> removed_nodes_;
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,95 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
template <typename ID, typename Data>
|
||||
class AccessorTestBase : public ::testing::Test {
|
||||
public:
|
||||
AccessorTestBase() { TestSetupUtil::StartUpRedisServers(std::vector<int>()); }
|
||||
|
||||
virtual ~AccessorTestBase() { TestSetupUtil::ShutDownRedisServers(); }
|
||||
|
||||
virtual void SetUp() {
|
||||
GenTestData();
|
||||
|
||||
GcsClientOptions options =
|
||||
GcsClientOptions("127.0.0.1", TEST_REDIS_SERVER_PORTS.front(), "", true);
|
||||
gcs_client_.reset(new RedisGcsClient(options));
|
||||
RAY_CHECK_OK(gcs_client_->Connect(io_service_));
|
||||
|
||||
work_thread_.reset(new std::thread([this] {
|
||||
std::unique_ptr<boost::asio::io_service::work> work(
|
||||
new boost::asio::io_service::work(io_service_));
|
||||
io_service_.run();
|
||||
}));
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
gcs_client_->Disconnect();
|
||||
|
||||
io_service_.stop();
|
||||
work_thread_->join();
|
||||
work_thread_.reset();
|
||||
|
||||
gcs_client_.reset();
|
||||
|
||||
ClearTestData();
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void GenTestData() = 0;
|
||||
|
||||
void ClearTestData() { id_to_data_.clear(); }
|
||||
|
||||
void WaitPendingDone(std::chrono::milliseconds timeout) {
|
||||
WaitPendingDone(pending_count_, timeout);
|
||||
}
|
||||
|
||||
void WaitPendingDone(std::atomic<int> &pending_count,
|
||||
std::chrono::milliseconds timeout) {
|
||||
auto condition = [&pending_count]() { return pending_count == 0; };
|
||||
EXPECT_TRUE(WaitForCondition(condition, timeout.count()));
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<RedisGcsClient> gcs_client_;
|
||||
|
||||
boost::asio::io_service io_service_;
|
||||
std::unique_ptr<std::thread> work_thread_;
|
||||
|
||||
std::unordered_map<ID, std::shared_ptr<Data>> id_to_data_;
|
||||
|
||||
std::atomic<int> pending_count_{0};
|
||||
std::chrono::milliseconds wait_pending_timeout_{10000};
|
||||
};
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
@@ -1,82 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/test/accessor_test_base.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class ActorInfoAccessorTest : public AccessorTestBase<ActorID, ActorTableData> {
|
||||
protected:
|
||||
virtual void GenTestData() {
|
||||
for (size_t i = 0; i < 100; ++i) {
|
||||
std::shared_ptr<ActorTableData> actor = std::make_shared<ActorTableData>();
|
||||
actor->set_max_restarts(1);
|
||||
actor->set_num_restarts(0);
|
||||
JobID job_id = JobID::FromInt(i);
|
||||
actor->set_job_id(job_id.Binary());
|
||||
actor->set_state(ActorTableData::ALIVE);
|
||||
ActorID actor_id = ActorID::Of(job_id, RandomTaskId(), /*parent_task_counter=*/i);
|
||||
actor->set_actor_id(actor_id.Binary());
|
||||
id_to_data_[actor_id] = actor;
|
||||
}
|
||||
}
|
||||
|
||||
size_t checkpoint_number_{2};
|
||||
};
|
||||
|
||||
TEST_F(ActorInfoAccessorTest, Subscribe) {
|
||||
ActorInfoAccessor &actor_accessor = gcs_client_->Actors();
|
||||
// subscribe
|
||||
std::atomic<int> sub_pending_count(0);
|
||||
std::atomic<int> do_sub_pending_count(0);
|
||||
auto subscribe = [this, &sub_pending_count](const ActorID &actor_id,
|
||||
const ActorTableData &data) {
|
||||
const auto it = id_to_data_.find(actor_id);
|
||||
ASSERT_TRUE(it != id_to_data_.end());
|
||||
--sub_pending_count;
|
||||
};
|
||||
auto done = [&do_sub_pending_count](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--do_sub_pending_count;
|
||||
};
|
||||
|
||||
++do_sub_pending_count;
|
||||
RAY_CHECK_OK(actor_accessor.AsyncSubscribeAll(subscribe, done));
|
||||
// Wait until subscribe finishes.
|
||||
WaitPendingDone(do_sub_pending_count, wait_pending_timeout_);
|
||||
}
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
RAY_CHECK(argc == 4);
|
||||
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
|
||||
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
|
||||
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,99 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/pb_util.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/test/accessor_test_base.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class RedisJobInfoAccessorTest : public AccessorTestBase<JobID, JobTableData> {
|
||||
protected:
|
||||
virtual void GenTestData() {
|
||||
for (size_t i = 0; i < total_job_number_; ++i) {
|
||||
JobID job_id = JobID::FromInt(i);
|
||||
std::shared_ptr<JobTableData> job_data_ptr =
|
||||
CreateJobTableData(job_id, /*is_dead*/ false, /*timestamp*/ 1,
|
||||
/*driver_ip_address*/ "", /*driver_pid*/ i);
|
||||
id_to_data_[job_id] = job_data_ptr;
|
||||
}
|
||||
}
|
||||
std::atomic<int> subscribe_pending_count_{0};
|
||||
size_t total_job_number_{100};
|
||||
};
|
||||
|
||||
TEST_F(RedisJobInfoAccessorTest, AddAndSubscribe) {
|
||||
JobInfoAccessor &job_accessor = gcs_client_->Jobs();
|
||||
// SubscribeAll
|
||||
auto on_subscribe = [this](const JobID &job_id, const JobTableData &data) {
|
||||
const auto it = id_to_data_.find(job_id);
|
||||
RAY_CHECK(it != id_to_data_.end());
|
||||
if (data.is_dead()) {
|
||||
--subscribe_pending_count_;
|
||||
}
|
||||
};
|
||||
|
||||
auto on_done = [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
};
|
||||
|
||||
++pending_count_;
|
||||
RAY_CHECK_OK(job_accessor.AsyncSubscribeAll(on_subscribe, on_done));
|
||||
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
|
||||
|
||||
// Register
|
||||
for (const auto &item : id_to_data_) {
|
||||
++pending_count_;
|
||||
RAY_CHECK_OK(job_accessor.AsyncAdd(item.second, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
|
||||
|
||||
// Update
|
||||
for (auto &item : id_to_data_) {
|
||||
++pending_count_;
|
||||
++subscribe_pending_count_;
|
||||
RAY_CHECK_OK(job_accessor.AsyncMarkFinished(item.first, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
|
||||
}
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
RAY_CHECK(argc == 4);
|
||||
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
|
||||
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
|
||||
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@@ -1,181 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/test/accessor_test_base.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class NodeDynamicResourceTest : public AccessorTestBase<NodeID, ResourceTableData> {
|
||||
protected:
|
||||
typedef NodeResourceInfoAccessor::ResourceMap ResourceMap;
|
||||
virtual void GenTestData() {
|
||||
for (size_t node_index = 0; node_index < node_number_; ++node_index) {
|
||||
NodeID id = NodeID::FromRandom();
|
||||
ResourceMap resource_map;
|
||||
for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) {
|
||||
std::shared_ptr<ResourceTableData> rs_data =
|
||||
std::make_shared<ResourceTableData>();
|
||||
rs_data->set_resource_capacity(rs_index);
|
||||
std::string resource_name = std::to_string(rs_index);
|
||||
resource_map[resource_name] = rs_data;
|
||||
if (resource_to_delete_.empty()) {
|
||||
resource_to_delete_.emplace_back(resource_name);
|
||||
}
|
||||
}
|
||||
id_to_resource_map_[id] = std::move(resource_map);
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<NodeID, ResourceMap> id_to_resource_map_;
|
||||
|
||||
size_t node_number_{100};
|
||||
size_t resource_type_number_{5};
|
||||
|
||||
std::vector<std::string> resource_to_delete_;
|
||||
|
||||
std::atomic<int> sub_pending_count_{0};
|
||||
std::atomic<int> do_sub_pending_count_{0};
|
||||
};
|
||||
|
||||
TEST_F(NodeDynamicResourceTest, UpdateAndGet) {
|
||||
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
const NodeID &id = node_rs.first;
|
||||
// Update
|
||||
Status status = node_resource_accessor.AsyncUpdateResources(
|
||||
node_rs.first, node_rs.second,
|
||||
[this, &node_resource_accessor, id](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
auto get_callback = [this, id](Status status,
|
||||
const boost::optional<ResourceMap> &result) {
|
||||
--pending_count_;
|
||||
RAY_CHECK_OK(status);
|
||||
const auto it = id_to_resource_map_.find(id);
|
||||
ASSERT_TRUE(result);
|
||||
ASSERT_EQ(it->second.size(), result->size());
|
||||
};
|
||||
// Get
|
||||
status = node_resource_accessor.AsyncGetResources(id, get_callback);
|
||||
RAY_CHECK_OK(status);
|
||||
});
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
}
|
||||
|
||||
TEST_F(NodeDynamicResourceTest, Delete) {
|
||||
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
// Update
|
||||
Status status = node_resource_accessor.AsyncUpdateResources(
|
||||
node_rs.first, node_rs.second, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
});
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
const NodeID &id = node_rs.first;
|
||||
// Delete
|
||||
Status status = node_resource_accessor.AsyncDeleteResources(
|
||||
id, resource_to_delete_, [this, &node_resource_accessor, id](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
// Get
|
||||
status = node_resource_accessor.AsyncGetResources(
|
||||
id, [this, id](Status status, const boost::optional<ResourceMap> &result) {
|
||||
--pending_count_;
|
||||
RAY_CHECK_OK(status);
|
||||
const auto it = id_to_resource_map_.find(id);
|
||||
ASSERT_TRUE(result);
|
||||
ASSERT_EQ(it->second.size() - resource_to_delete_.size(), result->size());
|
||||
});
|
||||
});
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
}
|
||||
|
||||
TEST_F(NodeDynamicResourceTest, Subscribe) {
|
||||
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
++pending_count_;
|
||||
// Update
|
||||
Status status = node_resource_accessor.AsyncUpdateResources(
|
||||
node_rs.first, node_rs.second, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
});
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
|
||||
auto subscribe = [this](const rpc::NodeResourceChange ¬ification) {
|
||||
auto id = NodeID::FromBinary(notification.node_id());
|
||||
RAY_LOG(INFO) << "receive client id=" << id;
|
||||
auto it = id_to_resource_map_.find(id);
|
||||
ASSERT_TRUE(it != id_to_resource_map_.end());
|
||||
if (0 == notification.deleted_resources_size()) {
|
||||
ASSERT_EQ(notification.updated_resources_size(), it->second.size());
|
||||
} else {
|
||||
ASSERT_EQ(notification.deleted_resources_size(), resource_to_delete_.size());
|
||||
}
|
||||
--sub_pending_count_;
|
||||
};
|
||||
|
||||
auto done = [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
};
|
||||
|
||||
// Subscribe
|
||||
++pending_count_;
|
||||
Status status = node_resource_accessor.AsyncSubscribeToResources(subscribe, done);
|
||||
RAY_CHECK_OK(status);
|
||||
|
||||
for (const auto &node_rs : id_to_resource_map_) {
|
||||
// Delete
|
||||
++pending_count_;
|
||||
++sub_pending_count_;
|
||||
Status status = node_resource_accessor.AsyncDeleteResources(
|
||||
node_rs.first, resource_to_delete_, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
});
|
||||
RAY_CHECK_OK(status);
|
||||
}
|
||||
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
|
||||
}
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
RAY_CHECK(argc == 4);
|
||||
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
|
||||
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
|
||||
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
// Copyright 2017 The Ray Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/test/accessor_test_base.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
namespace gcs {
|
||||
|
||||
class RedisObjectInfoAccessorTest : public AccessorTestBase<ObjectID, ObjectTableData> {
|
||||
protected:
|
||||
void GenTestData() {
|
||||
for (size_t i = 0; i < object_count_; ++i) {
|
||||
ObjectVector object_vec;
|
||||
for (size_t j = 0; j < copy_count_; ++j) {
|
||||
auto object = std::make_shared<ObjectTableData>();
|
||||
NodeID node_id = NodeID::FromRandom();
|
||||
object->set_manager(node_id.Binary());
|
||||
object_vec.emplace_back(std::move(object));
|
||||
}
|
||||
ObjectID id = ObjectID::FromRandom();
|
||||
object_id_to_data_[id] = object_vec;
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::vector<std::shared_ptr<ObjectTableData>> ObjectVector;
|
||||
std::unordered_map<ObjectID, ObjectVector> object_id_to_data_;
|
||||
|
||||
size_t object_count_{100};
|
||||
size_t copy_count_{5};
|
||||
};
|
||||
|
||||
TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
|
||||
ObjectInfoAccessor &object_accessor = gcs_client_->Objects();
|
||||
// add && get
|
||||
// add
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
for (const auto &item : elem.second) {
|
||||
++pending_count_;
|
||||
NodeID node_id = NodeID::FromBinary(item->manager());
|
||||
RAY_CHECK_OK(
|
||||
object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
// get
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
++pending_count_;
|
||||
size_t total_size = elem.second.size();
|
||||
RAY_CHECK_OK(object_accessor.AsyncGetLocations(
|
||||
elem.first,
|
||||
[this, total_size](Status status,
|
||||
const boost::optional<rpc::ObjectLocationInfo> &result) {
|
||||
RAY_CHECK_OK(status);
|
||||
ASSERT_EQ(total_size, result->locations().size());
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
|
||||
RAY_LOG(INFO) << "Case Add && Get done.";
|
||||
|
||||
// subscribe && delete
|
||||
// subscribe
|
||||
std::atomic<int> sub_pending_count(0);
|
||||
auto subscribe = [this, &sub_pending_count](
|
||||
const ObjectID &object_id,
|
||||
const std::vector<rpc::ObjectLocationChange> &result) {
|
||||
const auto it = object_id_to_data_.find(object_id);
|
||||
ASSERT_TRUE(it != object_id_to_data_.end());
|
||||
static size_t response_count = 1;
|
||||
size_t cur_count = response_count <= object_count_ ? copy_count_ : 1;
|
||||
ASSERT_EQ(result.size(), cur_count);
|
||||
bool change_mode = response_count <= object_count_;
|
||||
for (const auto &res : result) {
|
||||
ASSERT_EQ(change_mode, res.is_add());
|
||||
}
|
||||
++response_count;
|
||||
--sub_pending_count;
|
||||
};
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
++pending_count_;
|
||||
++sub_pending_count;
|
||||
RAY_CHECK_OK(object_accessor.AsyncSubscribeToLocations(elem.first, subscribe,
|
||||
[this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(sub_pending_count, wait_pending_timeout_);
|
||||
// delete
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
++pending_count_;
|
||||
++sub_pending_count;
|
||||
const ObjectVector &object_vec = elem.second;
|
||||
NodeID node_id = NodeID::FromBinary(object_vec[0]->manager());
|
||||
RAY_CHECK_OK(
|
||||
object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) {
|
||||
RAY_CHECK_OK(status);
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
WaitPendingDone(sub_pending_count, wait_pending_timeout_);
|
||||
// get
|
||||
for (const auto &elem : object_id_to_data_) {
|
||||
++pending_count_;
|
||||
size_t total_size = elem.second.size();
|
||||
RAY_CHECK_OK(object_accessor.AsyncGetLocations(
|
||||
elem.first,
|
||||
[this, total_size](Status status,
|
||||
const boost::optional<rpc::ObjectLocationInfo> &result) {
|
||||
RAY_CHECK_OK(status);
|
||||
ASSERT_EQ(total_size - 1, result->locations().size());
|
||||
--pending_count_;
|
||||
}));
|
||||
}
|
||||
WaitPendingDone(wait_pending_timeout_);
|
||||
|
||||
RAY_LOG(INFO) << "Case Subscribe && Delete done.";
|
||||
}
|
||||
|
||||
} // namespace gcs
|
||||
|
||||
} // namespace ray
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
InitShutdownRAII ray_log_shutdown_raii(ray::RayLog::StartRayLog,
|
||||
ray::RayLog::ShutDownRayLog, argv[0],
|
||||
ray::RayLogLevel::INFO,
|
||||
/*log_dir=*/"");
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
RAY_CHECK(argc == 4);
|
||||
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
|
||||
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
|
||||
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/object_manager/format/object_manager_generated.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "absl/container/flat_hash_map.h"
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/object_manager/format/object_manager_generated.h"
|
||||
#include "ray/object_manager/object_directory.h"
|
||||
#include "ray/rpc/worker/core_worker_client.h"
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
|
||||
#include "ray/object_manager/object_manager.h"
|
||||
#include "ray/util/filesystem.h"
|
||||
#include "src/ray/protobuf/common.pb.h"
|
||||
@@ -32,10 +33,24 @@ namespace ray {
|
||||
|
||||
using rpc::GcsNodeInfo;
|
||||
|
||||
static inline void flushall_redis(void) {
|
||||
static inline bool flushall_redis(void) {
|
||||
redisContext *context = redisConnect("127.0.0.1", 6379);
|
||||
if (context == nullptr || context->err) {
|
||||
return false;
|
||||
}
|
||||
freeReplyObject(redisCommand(context, "FLUSHALL"));
|
||||
freeReplyObject(redisCommand(context, "SET NumRedisShards 1"));
|
||||
freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380"));
|
||||
redisFree(context);
|
||||
|
||||
redisContext *shard_context = redisConnect("127.0.0.1", 6380);
|
||||
if (shard_context == nullptr || shard_context->err) {
|
||||
return false;
|
||||
}
|
||||
freeReplyObject(redisCommand(shard_context, "FLUSHALL"));
|
||||
redisFree(shard_context);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int64_t current_time_ms() {
|
||||
@@ -71,6 +86,7 @@ class MockServer {
|
||||
node_info.set_object_manager_port(object_manager_port);
|
||||
|
||||
ray::Status status = gcs_client_->Nodes().RegisterSelf(node_info, nullptr);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -85,7 +101,7 @@ class MockServer {
|
||||
class TestObjectManagerBase : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() {
|
||||
flushall_redis();
|
||||
WaitForCondition(flushall_redis, 7000);
|
||||
|
||||
// start store
|
||||
socket_name_1 = TestSetupUtil::StartObjectStore();
|
||||
@@ -96,9 +112,10 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
int push_timeout_ms = 10000;
|
||||
|
||||
// start first server
|
||||
gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1");
|
||||
gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "",
|
||||
/*is_test_client=*/true);
|
||||
gcs_client_1 = std::make_shared<gcs::RedisGcsClient>(client_options);
|
||||
/*is_test_client=*/false);
|
||||
gcs_client_1 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
|
||||
RAY_CHECK_OK(gcs_client_1->Connect(main_service));
|
||||
ObjectManagerConfig om_config_1;
|
||||
om_config_1.store_socket_name = socket_name_1;
|
||||
@@ -110,7 +127,7 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
server1.reset(new MockServer(main_service, om_config_1, gcs_client_1));
|
||||
|
||||
// start second server
|
||||
gcs_client_2 = std::make_shared<gcs::RedisGcsClient>(client_options);
|
||||
gcs_client_2 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
|
||||
RAY_CHECK_OK(gcs_client_2->Connect(main_service));
|
||||
ObjectManagerConfig om_config_2;
|
||||
om_config_2.store_socket_name = socket_name_2;
|
||||
@@ -139,6 +156,10 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
|
||||
TestSetupUtil::StopObjectStore(socket_name_1);
|
||||
TestSetupUtil::StopObjectStore(socket_name_2);
|
||||
|
||||
if (!gcs_server_socket_name_.empty()) {
|
||||
TestSetupUtil::StopGcsServer(gcs_server_socket_name_);
|
||||
}
|
||||
}
|
||||
|
||||
ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) {
|
||||
@@ -172,6 +193,7 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
std::vector<ObjectID> v1;
|
||||
std::vector<ObjectID> v2;
|
||||
|
||||
std::string gcs_server_socket_name_;
|
||||
std::string socket_name_1;
|
||||
std::string socket_name_2;
|
||||
};
|
||||
@@ -421,5 +443,6 @@ TEST_F(StressTestObjectManager, StartStressTestObjectManager) {
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
ray::TEST_STORE_EXEC_PATH = std::string(argv[1]);
|
||||
ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[2]);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/status.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
|
||||
#include "ray/util/filesystem.h"
|
||||
#include "src/ray/protobuf/common.pb.h"
|
||||
|
||||
@@ -38,6 +39,8 @@ using rpc::GcsNodeInfo;
|
||||
static inline void flushall_redis(void) {
|
||||
redisContext *context = redisConnect("127.0.0.1", 6379);
|
||||
freeReplyObject(redisCommand(context, "FLUSHALL"));
|
||||
freeReplyObject(redisCommand(context, "SET NumRedisShards 1"));
|
||||
freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380"));
|
||||
redisFree(context);
|
||||
}
|
||||
|
||||
@@ -91,9 +94,10 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
push_timeout_ms = 1500;
|
||||
|
||||
// start first server
|
||||
gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1");
|
||||
gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "",
|
||||
/*is_test_client=*/true);
|
||||
gcs_client_1 = std::make_shared<gcs::RedisGcsClient>(client_options);
|
||||
gcs_client_1 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
|
||||
RAY_CHECK_OK(gcs_client_1->Connect(main_service));
|
||||
ObjectManagerConfig om_config_1;
|
||||
om_config_1.store_socket_name = socket_name_1;
|
||||
@@ -105,7 +109,7 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
server1.reset(new MockServer(main_service, om_config_1, gcs_client_1));
|
||||
|
||||
// start second server
|
||||
gcs_client_2 = std::make_shared<gcs::RedisGcsClient>(client_options);
|
||||
gcs_client_2 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
|
||||
RAY_CHECK_OK(gcs_client_2->Connect(main_service));
|
||||
ObjectManagerConfig om_config_2;
|
||||
om_config_2.store_socket_name = socket_name_2;
|
||||
@@ -134,6 +138,10 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
|
||||
TestSetupUtil::StopObjectStore(socket_name_1);
|
||||
TestSetupUtil::StopObjectStore(socket_name_2);
|
||||
|
||||
if (!gcs_server_socket_name_.empty()) {
|
||||
TestSetupUtil::StopGcsServer(gcs_server_socket_name_);
|
||||
}
|
||||
}
|
||||
|
||||
ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) {
|
||||
@@ -171,6 +179,7 @@ class TestObjectManagerBase : public ::testing::Test {
|
||||
std::vector<ObjectID> v1;
|
||||
std::vector<ObjectID> v2;
|
||||
|
||||
std::string gcs_server_socket_name_;
|
||||
std::string socket_name_1;
|
||||
std::string socket_name_2;
|
||||
|
||||
@@ -482,5 +491,6 @@ int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
ray::TEST_STORE_EXEC_PATH = std::string(argv[1]);
|
||||
wait_timeout_ms = std::stoi(std::string(argv[2]));
|
||||
ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[3]);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@@ -306,7 +306,7 @@ ray::Status NodeManager::RegisterGcs() {
|
||||
// node failure. These workers can be identified by comparing the raylet_id
|
||||
// in their rpc::Address to the ID of a failed raylet.
|
||||
const auto &worker_failure_handler =
|
||||
[this](const WorkerID &id, const gcs::WorkerTableData &worker_failure_data) {
|
||||
[this](const WorkerID &id, const rpc::WorkerTableData &worker_failure_data) {
|
||||
HandleUnexpectedWorkerFailure(worker_failure_data.worker_address());
|
||||
};
|
||||
RAY_CHECK_OK(gcs_client_->Workers().AsyncSubscribeToWorkerFailures(
|
||||
@@ -1984,8 +1984,8 @@ void NodeManager::ProcessSetResourceRequest(
|
||||
RAY_CHECK_OK(gcs_client_->NodeResources().AsyncDeleteResources(
|
||||
node_id, {resource_name}, nullptr));
|
||||
} else {
|
||||
std::unordered_map<std::string, std::shared_ptr<gcs::ResourceTableData>> data_map;
|
||||
auto resource_table_data = std::make_shared<gcs::ResourceTableData>();
|
||||
std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>> data_map;
|
||||
auto resource_table_data = std::make_shared<rpc::ResourceTableData>();
|
||||
resource_table_data->set_resource_capacity(capacity);
|
||||
data_map.emplace(resource_name, resource_table_data);
|
||||
RAY_CHECK_OK(
|
||||
|
||||
@@ -139,10 +139,10 @@ ray::Status Raylet::RegisterGcs() {
|
||||
|
||||
// Add resource information.
|
||||
const NodeManagerConfig &node_manager_config = node_manager_.GetInitialConfig();
|
||||
std::unordered_map<std::string, std::shared_ptr<gcs::ResourceTableData>> resources;
|
||||
std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>> resources;
|
||||
for (const auto &resource_pair :
|
||||
node_manager_config.resource_config.GetResourceMap()) {
|
||||
auto resource = std::make_shared<gcs::ResourceTableData>();
|
||||
auto resource = std::make_shared<rpc::ResourceTableData>();
|
||||
resource->set_resource_capacity(resource_pair.second);
|
||||
resources.emplace(resource_pair.first, resource);
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
#include <unordered_set>
|
||||
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/gcs/tables.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/object_manager/object_directory.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -21,9 +21,11 @@
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/gcs/callback.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/gcs_client/service_based_accessor.h"
|
||||
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
|
||||
#include "ray/object_manager/object_directory.h"
|
||||
#include "ray/raylet/format/node_manager_generated.h"
|
||||
#include "ray/raylet/reconstruction_policy.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -97,17 +99,18 @@ class MockObjectDirectory : public ObjectDirectoryInterface {
|
||||
std::unordered_map<ObjectID, std::unordered_set<NodeID>> locations_;
|
||||
};
|
||||
|
||||
class MockNodeInfoAccessor : public gcs::RedisNodeInfoAccessor {
|
||||
class MockNodeInfoAccessor : public gcs::ServiceBasedNodeInfoAccessor {
|
||||
public:
|
||||
MockNodeInfoAccessor(gcs::RedisGcsClient *client)
|
||||
: gcs::RedisNodeInfoAccessor(client) {}
|
||||
MockNodeInfoAccessor(gcs::ServiceBasedGcsClient *client)
|
||||
: gcs::ServiceBasedNodeInfoAccessor(client) {}
|
||||
|
||||
bool IsRemoved(const NodeID &node_id) const override { return false; }
|
||||
};
|
||||
|
||||
class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor {
|
||||
class MockTaskInfoAccessor : public gcs::ServiceBasedTaskInfoAccessor {
|
||||
public:
|
||||
MockTaskInfoAccessor(gcs::RedisGcsClient *client) : RedisTaskInfoAccessor(client) {}
|
||||
MockTaskInfoAccessor(gcs::ServiceBasedGcsClient *client)
|
||||
: ServiceBasedTaskInfoAccessor(client) {}
|
||||
|
||||
Status AsyncSubscribeTaskLease(
|
||||
const TaskID &task_id,
|
||||
@@ -180,9 +183,9 @@ class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor {
|
||||
task_reconstruction_log_;
|
||||
};
|
||||
|
||||
class MockGcs : public gcs::RedisGcsClient {
|
||||
class MockGcs : public gcs::ServiceBasedGcsClient {
|
||||
public:
|
||||
MockGcs() : gcs::RedisGcsClient(gcs::GcsClientOptions("", 0, "")){};
|
||||
MockGcs() : gcs::ServiceBasedGcsClient(gcs::GcsClientOptions("", 0, "")){};
|
||||
|
||||
void Init(gcs::TaskInfoAccessor *task_accessor, gcs::NodeInfoAccessor *node_accessor) {
|
||||
task_accessor_.reset(task_accessor);
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
// clang-format off
|
||||
#include "ray/common/id.h"
|
||||
#include "ray/common/task/task.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/object_manager/object_manager.h"
|
||||
#include "ray/raylet/reconstruction_policy.h"
|
||||
// clang-format on
|
||||
|
||||
@@ -21,8 +21,6 @@
|
||||
#include "gtest/gtest.h"
|
||||
#include "ray/common/task/task_util.h"
|
||||
#include "ray/common/test_util.h"
|
||||
#include "ray/gcs/redis_accessor.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include "ray/common/client_connection.h"
|
||||
#include "ray/common/task/task.h"
|
||||
#include "ray/common/task/task_common.h"
|
||||
#include "ray/gcs/redis_gcs_client.h"
|
||||
#include "ray/gcs/gcs_client.h"
|
||||
#include "ray/raylet/worker.h"
|
||||
|
||||
namespace ray {
|
||||
|
||||
@@ -25,18 +25,22 @@ fi
|
||||
REDIS_MODULE="./bazel-bin/libray_redis_module.so"
|
||||
LOAD_MODULE_ARGS=(--loadmodule "${REDIS_MODULE}")
|
||||
STORE_EXEC="./bazel-bin/plasma_store_server"
|
||||
GCS_SERVER_EXEC="./bazel-bin/gcs_server"
|
||||
|
||||
# Allow cleanup commands to fail.
|
||||
bazel run //:redis-cli -- -p 6379 shutdown || true
|
||||
bazel run //:redis-cli -- -p 6380 shutdown || true
|
||||
sleep 1s
|
||||
bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6379 &
|
||||
bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6380 &
|
||||
sleep 1s
|
||||
# Run tests.
|
||||
./bazel-bin/object_manager_stress_test $STORE_EXEC
|
||||
./bazel-bin/object_manager_stress_test $STORE_EXEC $GCS_SERVER_EXEC
|
||||
sleep 1s
|
||||
# Use timeout=1000ms for the Wait tests.
|
||||
./bazel-bin/object_manager_test $STORE_EXEC 1000
|
||||
./bazel-bin/object_manager_test $STORE_EXEC 1000 $GCS_SERVER_EXEC
|
||||
bazel run //:redis-cli -- -p 6379 shutdown
|
||||
bazel run //:redis-cli -- -p 6380 shutdown
|
||||
sleep 1s
|
||||
|
||||
# Include raylet integration test once it's ready.
|
||||
|
||||
Reference in New Issue
Block a user