diff --git a/BUILD.bazel b/BUILD.bazel index 16b9a315f..8782dbdf8 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -1020,7 +1020,6 @@ cc_test( cc_library( name = "gcs_test_util_lib", hdrs = [ - "src/ray/gcs/test/accessor_test_base.h", "src/ray/gcs/test/gcs_test_util.h", ], copts = COPTS, @@ -1621,111 +1620,6 @@ cc_library( ], ) -# TODO(micafan) Support test group in future. Use test group we can run all gcs test once. -cc_test( - name = "redis_gcs_client_test", - srcs = ["src/ray/gcs/test/redis_gcs_client_test.cc"], - args = [ - "$(location redis-server)", - "$(location redis-cli)", - "$(location libray_redis_module.so)", - ], - copts = COPTS, - data = [ - "//:libray_redis_module.so", - "//:redis-cli", - "//:redis-server", - ], - deps = [ - ":gcs", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "redis_actor_info_accessor_test", - srcs = ["src/ray/gcs/test/redis_actor_info_accessor_test.cc"], - args = [ - "$(location redis-server)", - "$(location redis-cli)", - "$(location libray_redis_module.so)", - ], - copts = COPTS, - data = [ - "//:libray_redis_module.so", - "//:redis-cli", - "//:redis-server", - ], - deps = [ - ":gcs", - ":gcs_test_util_lib", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "redis_object_info_accessor_test", - srcs = ["src/ray/gcs/test/redis_object_info_accessor_test.cc"], - args = [ - "$(location redis-server)", - "$(location redis-cli)", - "$(location libray_redis_module.so)", - ], - copts = COPTS, - data = [ - "//:libray_redis_module.so", - "//:redis-cli", - "//:redis-server", - ], - deps = [ - ":gcs", - ":gcs_test_util_lib", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "redis_job_info_accessor_test", - srcs = ["src/ray/gcs/test/redis_job_info_accessor_test.cc"], - args = [ - "$(location redis-server)", - "$(location redis-cli)", - "$(location libray_redis_module.so)", - ], - copts = COPTS, - data = [ - "//:libray_redis_module.so", - "//:redis-cli", - "//:redis-server", - ], - deps = [ - ":gcs", - ":gcs_test_util_lib", - "@com_google_googletest//:gtest_main", - ], -) - -cc_test( - name = "redis_node_info_accessor_test", - srcs = ["src/ray/gcs/test/redis_node_info_accessor_test.cc"], - args = [ - "$(location redis-server)", - "$(location redis-cli)", - "$(location libray_redis_module.so)", - ], - copts = COPTS, - data = [ - "//:libray_redis_module.so", - "//:redis-cli", - "//:redis-server", - ], - deps = [ - ":gcs", - ":gcs_test_util_lib", - "@com_google_googletest//:gtest_main", - ], -) - cc_test( name = "asio_test", srcs = ["src/ray/gcs/test/asio_test.cc"], diff --git a/ci/travis/ci.sh b/ci/travis/ci.sh index e4d9741cd..9a8c0ecbf 100755 --- a/ci/travis/ci.sh +++ b/ci/travis/ci.sh @@ -120,7 +120,6 @@ test_core() { case "${OSTYPE}" in msys) args+=( - -//:redis_gcs_client_test -//:core_worker_test -//:event_test -//:gcs_pub_sub_test diff --git a/src/ray/core_worker/actor_handle.cc b/src/ray/core_worker/actor_handle.cc index 73e56df54..5448b7057 100644 --- a/src/ray/core_worker/actor_handle.cc +++ b/src/ray/core_worker/actor_handle.cc @@ -45,7 +45,7 @@ ray::rpc::ActorHandle CreateInnerActorHandleFromString(const std::string &serial } ray::rpc::ActorHandle CreateInnerActorHandleFromActorTableData( - const ray::gcs::ActorTableData &actor_table_data) { + const ray::rpc::ActorTableData &actor_table_data) { ray::rpc::ActorHandle inner; inner.set_actor_id(actor_table_data.actor_id()); inner.set_owner_id(actor_table_data.parent_id()); @@ -80,7 +80,7 @@ ActorHandle::ActorHandle( ActorHandle::ActorHandle(const std::string &serialized) : ActorHandle(CreateInnerActorHandleFromString(serialized)) {} -ActorHandle::ActorHandle(const gcs::ActorTableData &actor_table_data) +ActorHandle::ActorHandle(const rpc::ActorTableData &actor_table_data) : ActorHandle(CreateInnerActorHandleFromActorTableData(actor_table_data)) {} void ActorHandle::SetActorTaskSpec(TaskSpecBuilder &builder, const ObjectID new_cursor) { diff --git a/src/ray/core_worker/actor_handle.h b/src/ray/core_worker/actor_handle.h index 12b47cb53..e23929303 100644 --- a/src/ray/core_worker/actor_handle.h +++ b/src/ray/core_worker/actor_handle.h @@ -20,7 +20,6 @@ #include "ray/common/task/task_util.h" #include "ray/core_worker/common.h" #include "ray/core_worker/context.h" -#include "ray/gcs/redis_gcs_client.h" #include "src/ray/protobuf/core_worker.pb.h" #include "src/ray/protobuf/gcs.pb.h" @@ -42,7 +41,7 @@ class ActorHandle { ActorHandle(const std::string &serialized); /// Constructs an ActorHandle from a gcs::ActorTableData message. - ActorHandle(const gcs::ActorTableData &actor_table_data); + ActorHandle(const rpc::ActorTableData &actor_table_data); ActorID GetActorID() const { return ActorID::FromBinary(inner_.actor_id()); }; diff --git a/src/ray/core_worker/actor_manager.cc b/src/ray/core_worker/actor_manager.cc index 6b931082a..73ca9ec34 100644 --- a/src/ray/core_worker/actor_manager.cc +++ b/src/ray/core_worker/actor_manager.cc @@ -15,7 +15,6 @@ #include "ray/core_worker/actor_manager.h" #include "ray/gcs/pb_util.h" -#include "ray/gcs/redis_accessor.h" namespace ray { @@ -124,8 +123,8 @@ void ActorManager::WaitForActorOutOfScope( } void ActorManager::HandleActorStateNotification(const ActorID &actor_id, - const gcs::ActorTableData &actor_data) { - const auto &actor_state = gcs::ActorTableData::ActorState_Name(actor_data.state()); + const rpc::ActorTableData &actor_data) { + const auto &actor_state = rpc::ActorTableData::ActorState_Name(actor_data.state()); RAY_LOG(INFO) << "received notification on actor, state: " << actor_state << ", actor_id: " << actor_id << ", ip address: " << actor_data.address().ip_address() @@ -133,14 +132,14 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id, << WorkerID::FromBinary(actor_data.address().worker_id()) << ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id()) << ", num_restarts: " << actor_data.num_restarts(); - if (actor_data.state() == gcs::ActorTableData::RESTARTING) { + if (actor_data.state() == rpc::ActorTableData::RESTARTING) { direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false); - } else if (actor_data.state() == gcs::ActorTableData::DEAD) { + } else if (actor_data.state() == rpc::ActorTableData::DEAD) { direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), true); // We cannot erase the actor handle here because clients can still // submit tasks to dead actors. This also means we defer unsubscription, // otherwise we crash when bulk unsubscribing all actor handles. - } else if (actor_data.state() == gcs::ActorTableData::ALIVE) { + } else if (actor_data.state() == rpc::ActorTableData::ALIVE) { direct_actor_submitter_->ConnectActor(actor_id, actor_data.address(), actor_data.num_restarts()); } else { diff --git a/src/ray/core_worker/actor_manager.h b/src/ray/core_worker/actor_manager.h index e3c72913a..ff47b7403 100644 --- a/src/ray/core_worker/actor_manager.h +++ b/src/ray/core_worker/actor_manager.h @@ -18,7 +18,7 @@ #include "ray/core_worker/actor_handle.h" #include "ray/core_worker/reference_count.h" #include "ray/core_worker/transport/direct_actor_transport.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client.h" namespace ray { @@ -177,7 +177,7 @@ class ActorManager { /// \param[in] actor_id The actor id of this notification. /// \param[in] actor_data The GCS actor data. void HandleActorStateNotification(const ActorID &actor_id, - const gcs::ActorTableData &actor_data); + const rpc::ActorTableData &actor_data); /// GCS client. std::shared_ptr gcs_client_; diff --git a/src/ray/core_worker/core_worker.cc b/src/ray/core_worker/core_worker.cc index d2ab2c150..219b5e062 100644 --- a/src/ray/core_worker/core_worker.cc +++ b/src/ray/core_worker/core_worker.cc @@ -464,7 +464,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_ TaskID::ComputeDriverTaskId(worker_context_.GetWorkerID()), GetCallerId(), rpc_address_); - std::shared_ptr data = std::make_shared(); + std::shared_ptr data = std::make_shared(); data->mutable_task()->mutable_task_spec()->CopyFrom(builder.Build().GetMessage()); if (!options_.is_local_mode) { RAY_CHECK_OK(gcs_client_->Tasks().AsyncAdd(data, nullptr)); @@ -1639,7 +1639,7 @@ std::pair CoreWorker::GetNamedActorHandle( std::make_shared>(std::promise()); RAY_CHECK_OK(gcs_client_->Actors().AsyncGetByName( name, [this, &actor_id, name, ready_promise]( - Status status, const boost::optional &result) { + Status status, const boost::optional &result) { if (status.ok() && result) { auto actor_handle = std::unique_ptr(new ActorHandle(*result)); actor_id = actor_handle->GetActorID(); diff --git a/src/ray/core_worker/core_worker.h b/src/ray/core_worker/core_worker.h index 14136a895..256f0c42d 100644 --- a/src/ray/core_worker/core_worker.h +++ b/src/ray/core_worker/core_worker.h @@ -30,8 +30,7 @@ #include "ray/core_worker/store_provider/plasma_store_provider.h" #include "ray/core_worker/transport/direct_actor_transport.h" #include "ray/core_worker/transport/direct_task_transport.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/gcs/subscription_executor.h" +#include "ray/gcs/gcs_client.h" #include "ray/raylet_client/raylet_client.h" #include "ray/rpc/node_manager/node_manager_client.h" #include "ray/rpc/worker/core_worker_client.h" diff --git a/src/ray/core_worker/profiling.h b/src/ray/core_worker/profiling.h index 908fb77a3..24c15a29c 100644 --- a/src/ray/core_worker/profiling.h +++ b/src/ray/core_worker/profiling.h @@ -18,7 +18,7 @@ #include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "ray/core_worker/context.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client.h" namespace ray { diff --git a/src/ray/core_worker/test/actor_manager_test.cc b/src/ray/core_worker/test/actor_manager_test.cc index 06cb9a70e..cd4a21408 100644 --- a/src/ray/core_worker/test/actor_manager_test.cc +++ b/src/ray/core_worker/test/actor_manager_test.cc @@ -20,17 +20,17 @@ #include "ray/common/test_util.h" #include "ray/core_worker/reference_count.h" #include "ray/core_worker/transport/direct_actor_transport.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client/service_based_accessor.h" +#include "ray/gcs/gcs_client/service_based_gcs_client.h" namespace ray { using ::testing::_; -class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor { +class MockActorInfoAccessor : public gcs::ServiceBasedActorInfoAccessor { public: - MockActorInfoAccessor(gcs::RedisGcsClient *client) - : gcs::RedisActorInfoAccessor(client) {} + MockActorInfoAccessor(gcs::ServiceBasedGcsClient *client) + : gcs::ServiceBasedActorInfoAccessor(client) {} ~MockActorInfoAccessor() {} @@ -44,7 +44,7 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor { } bool ActorStateNotificationPublished(const ActorID &actor_id, - const gcs::ActorTableData &actor_data) { + const rpc::ActorTableData &actor_data) { auto it = callback_map_.find(actor_id); if (it == callback_map_.end()) return false; auto actor_state_notification_callback = it->second; @@ -60,15 +60,13 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor { callback_map_; }; -class MockGcsClient : public gcs::RedisGcsClient { +class MockGcsClient : public gcs::ServiceBasedGcsClient { public: - MockGcsClient(const gcs::GcsClientOptions &options) : gcs::RedisGcsClient(options) {} + MockGcsClient(gcs::GcsClientOptions options) : gcs::ServiceBasedGcsClient(options) {} - void Init(MockActorInfoAccessor *actor_accesor_mock) { - actor_accessor_.reset(actor_accesor_mock); + void Init(MockActorInfoAccessor *actor_info_accessor) { + actor_accessor_.reset(actor_info_accessor); } - - ~MockGcsClient() {} }; class MockDirectActorSubmitter : public CoreWorkerDirectActorTaskSubmitterInterface { diff --git a/src/ray/core_worker/test/direct_actor_transport_test.cc b/src/ray/core_worker/test/direct_actor_transport_test.cc index dffb8c4b5..8c196163e 100644 --- a/src/ray/core_worker/test/direct_actor_transport_test.cc +++ b/src/ray/core_worker/test/direct_actor_transport_test.cc @@ -223,7 +223,6 @@ TEST_F(DirectActorSubmitterTest, TestActorDead) { addr.set_worker_id(worker_id.Binary()); ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0); submitter_.AddActorQueueIfNotExists(actor_id); - gcs::ActorTableData actor_data; submitter_.ConnectActor(actor_id, addr, 0); ASSERT_EQ(worker_client_->callbacks.size(), 0); @@ -256,7 +255,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartNoRetry) { addr.set_worker_id(worker_id.Binary()); ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0); submitter_.AddActorQueueIfNotExists(actor_id); - gcs::ActorTableData actor_data; addr.set_port(0); submitter_.ConnectActor(actor_id, addr, 0); ASSERT_EQ(worker_client_->callbacks.size(), 0); @@ -299,7 +297,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartRetry) { addr.set_worker_id(worker_id.Binary()); ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0); submitter_.AddActorQueueIfNotExists(actor_id); - gcs::ActorTableData actor_data; addr.set_port(0); submitter_.ConnectActor(actor_id, addr, 0); ASSERT_EQ(worker_client_->callbacks.size(), 0); @@ -351,7 +348,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderRetry) { addr.set_worker_id(worker_id.Binary()); ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0); submitter_.AddActorQueueIfNotExists(actor_id); - gcs::ActorTableData actor_data; addr.set_port(0); submitter_.ConnectActor(actor_id, addr, 0); ASSERT_EQ(worker_client_->callbacks.size(), 0); @@ -401,7 +397,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderGcs) { addr.set_worker_id(worker_id.Binary()); ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0); submitter_.AddActorQueueIfNotExists(actor_id); - gcs::ActorTableData actor_data; addr.set_port(0); submitter_.ConnectActor(actor_id, addr, 0); ASSERT_EQ(worker_client_->callbacks.size(), 0); diff --git a/src/ray/core_worker/transport/direct_actor_transport.h b/src/ray/core_worker/transport/direct_actor_transport.h index cb7637c9f..ab28dc85a 100644 --- a/src/ray/core_worker/transport/direct_actor_transport.h +++ b/src/ray/core_worker/transport/direct_actor_transport.h @@ -32,7 +32,6 @@ #include "ray/core_worker/store_provider/memory_store/memory_store.h" #include "ray/core_worker/task_manager.h" #include "ray/core_worker/transport/dependency_resolver.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/grpc_server.h" #include "ray/rpc/worker/core_worker_client.h" diff --git a/src/ray/gcs/gcs_client/global_state_accessor.cc b/src/ray/gcs/gcs_client/global_state_accessor.cc index 8d188ba07..5791515bc 100644 --- a/src/ray/gcs/gcs_client/global_state_accessor.cc +++ b/src/ray/gcs/gcs_client/global_state_accessor.cc @@ -225,7 +225,7 @@ std::vector GlobalStateAccessor::GetAllWorkerInfo() { } bool GlobalStateAccessor::AddWorkerInfo(const std::string &serialized_string) { - auto data_ptr = std::make_shared(); + auto data_ptr = std::make_shared(); data_ptr->ParseFromString(serialized_string); std::promise promise; RAY_CHECK_OK( diff --git a/src/ray/gcs/gcs_client/service_based_accessor.cc b/src/ray/gcs/gcs_client/service_based_accessor.cc index 7e7d67d44..0e610a68e 100644 --- a/src/ray/gcs/gcs_client/service_based_accessor.cc +++ b/src/ray/gcs/gcs_client/service_based_accessor.cc @@ -19,6 +19,8 @@ namespace ray { namespace gcs { +using namespace ray::rpc; + ServiceBasedJobInfoAccessor::ServiceBasedJobInfoAccessor( ServiceBasedGcsClient *client_impl) : client_impl_(client_impl) {} diff --git a/src/ray/gcs/gcs_client/service_based_accessor.h b/src/ray/gcs/gcs_client/service_based_accessor.h index 05f2d4316..167814bb2 100644 --- a/src/ray/gcs/gcs_client/service_based_accessor.h +++ b/src/ray/gcs/gcs_client/service_based_accessor.h @@ -16,7 +16,6 @@ #include "ray/common/task/task_spec.h" #include "ray/gcs/accessor.h" -#include "ray/gcs/subscription_executor.h" #include "ray/util/sequencer.h" #include "src/ray/protobuf/gcs_service.pb.h" @@ -38,12 +37,12 @@ class ServiceBasedJobInfoAccessor : public JobInfoAccessor { virtual ~ServiceBasedJobInfoAccessor() = default; - Status AsyncAdd(const std::shared_ptr &data_ptr, + Status AsyncAdd(const std::shared_ptr &data_ptr, const StatusCallback &callback) override; Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override; - Status AsyncSubscribeAll(const SubscribeCallback &subscribe, + Status AsyncSubscribeAll(const SubscribeCallback &subscribe, const StatusCallback &done) override; Status AsyncGetAll(const MultiItemCallback &callback) override; @@ -71,7 +70,7 @@ class ServiceBasedActorInfoAccessor : public ActorInfoAccessor { virtual ~ServiceBasedActorInfoAccessor() = default; - Status GetAll(std::vector *actor_table_data_list) override; + Status GetAll(std::vector *actor_table_data_list) override; Status AsyncGet(const ActorID &actor_id, const OptionalItemCallback &callback) override; @@ -136,30 +135,30 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor { virtual ~ServiceBasedNodeInfoAccessor() = default; - Status RegisterSelf(const GcsNodeInfo &local_node_info, + Status RegisterSelf(const rpc::GcsNodeInfo &local_node_info, const StatusCallback &callback) override; Status UnregisterSelf() override; const NodeID &GetSelfId() const override; - const GcsNodeInfo &GetSelfInfo() const override; + const rpc::GcsNodeInfo &GetSelfInfo() const override; Status AsyncRegister(const rpc::GcsNodeInfo &node_info, const StatusCallback &callback) override; Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override; - Status AsyncGetAll(const MultiItemCallback &callback) override; + Status AsyncGetAll(const MultiItemCallback &callback) override; Status AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, + const SubscribeCallback &subscribe, const StatusCallback &done) override; - boost::optional Get(const NodeID &node_id, - bool filter_dead_nodes = false) const override; + boost::optional Get(const NodeID &node_id, + bool filter_dead_nodes = false) const override; - const std::unordered_map &GetAll() const override; + const std::unordered_map &GetAll() const override; bool IsRemoved(const NodeID &node_id) const override; @@ -207,21 +206,21 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor { /// from a failure. rpc::ReportResourceUsageRequest cached_resource_usage_ GUARDED_BY(mutex_); - void HandleNotification(const GcsNodeInfo &node_info); + void HandleNotification(const rpc::GcsNodeInfo &node_info); ServiceBasedGcsClient *client_impl_; using NodeChangeCallback = - std::function; + std::function; - GcsNodeInfo local_node_info_; + rpc::GcsNodeInfo local_node_info_; NodeID local_node_id_; /// The callback to call when a new node is added or a node is removed. NodeChangeCallback node_change_callback_{nullptr}; /// A cache for information about all nodes. - std::unordered_map node_cache_; + std::unordered_map node_cache_; /// The set of removed nodes. std::unordered_set removed_nodes_; }; diff --git a/src/ray/gcs/gcs_client/service_based_gcs_client.cc b/src/ray/gcs/gcs_client/service_based_gcs_client.cc index f643496b8..900d3e50d 100644 --- a/src/ray/gcs/gcs_client/service_based_gcs_client.cc +++ b/src/ray/gcs/gcs_client/service_based_gcs_client.cc @@ -37,21 +37,23 @@ Status ServiceBasedGcsClient::Connect(boost::asio::io_service &io_service) { return Status::Invalid("gcs service address is invalid!"); } - // Connect to gcs. - redis_gcs_client_.reset(new RedisGcsClient(options_)); - RAY_CHECK_OK(redis_gcs_client_->Connect(io_service)); + // Connect to redis. + RedisClientOptions redis_client_options(options_.server_ip_, options_.server_port_, + options_.password_, options_.is_test_client_); + redis_client_.reset(new RedisClient(redis_client_options)); + RAY_CHECK_OK(redis_client_->Connect(io_service)); // Init gcs pub sub instance. - gcs_pub_sub_.reset(new GcsPubSub(redis_gcs_client_->GetRedisClient())); + gcs_pub_sub_.reset(new GcsPubSub(redis_client_)); // Get gcs service address. get_server_address_func_ = [this](std::pair *address) { return GetGcsServerAddressFromRedis( - redis_gcs_client_->primary_context()->sync_context(), address); + redis_client_->GetPrimaryContext()->sync_context(), address); }; std::pair address; RAY_CHECK(GetGcsServerAddressFromRedis( - redis_gcs_client_->primary_context()->sync_context(), &address, + redis_client_->GetPrimaryContext()->sync_context(), &address, RayConfig::instance().gcs_service_connect_retries())) << "Failed to get gcs server address when init gcs client."; @@ -96,8 +98,8 @@ void ServiceBasedGcsClient::Disconnect() { is_connected_ = false; detect_timer_->cancel(); gcs_pub_sub_.reset(); - redis_gcs_client_->Disconnect(); - redis_gcs_client_.reset(); + redis_client_->Disconnect(); + redis_client_.reset(); RAY_LOG(DEBUG) << "ServiceBasedGcsClient Disconnected."; } diff --git a/src/ray/gcs/gcs_client/service_based_gcs_client.h b/src/ray/gcs/gcs_client/service_based_gcs_client.h index 906165099..9b0e79806 100644 --- a/src/ray/gcs/gcs_client/service_based_gcs_client.h +++ b/src/ray/gcs/gcs_client/service_based_gcs_client.h @@ -14,8 +14,9 @@ #pragma once +#include "ray/gcs/gcs_client.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/redis_client.h" #include "ray/rpc/gcs_server/gcs_rpc_client.h" namespace ray { @@ -31,8 +32,6 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient { GcsPubSub &GetGcsPubSub() { return *gcs_pub_sub_; } - RedisGcsClient &GetRedisGcsClient() { return *redis_gcs_client_; } - rpc::GcsRpcClient &GetGcsRpcClient() { return *gcs_rpc_client_; } private: @@ -59,7 +58,7 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient { /// Reconnect to GCS RPC server. void ReconnectGcsServer(); - std::unique_ptr redis_gcs_client_; + std::shared_ptr redis_client_; std::unique_ptr gcs_pub_sub_; diff --git a/src/ray/gcs/gcs_server/gcs_actor_manager.h b/src/ray/gcs/gcs_server/gcs_actor_manager.h index c2f23ac2d..e10be2fe8 100644 --- a/src/ray/gcs/gcs_server/gcs_actor_manager.h +++ b/src/ray/gcs/gcs_server/gcs_actor_manager.h @@ -24,7 +24,6 @@ #include "ray/gcs/gcs_server/gcs_init_data.h" #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" #include "ray/rpc/worker/core_worker_client.h" #include "src/ray/protobuf/gcs_service.pb.h" diff --git a/src/ray/gcs/gcs_server/gcs_job_manager.h b/src/ray/gcs/gcs_server/gcs_job_manager.h index 24d8f7dfe..da8628967 100644 --- a/src/ray/gcs/gcs_server/gcs_job_manager.h +++ b/src/ray/gcs/gcs_server/gcs_job_manager.h @@ -17,7 +17,6 @@ #include "ray/gcs/gcs_server/gcs_object_manager.h" #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" namespace ray { diff --git a/src/ray/gcs/gcs_server/gcs_object_manager.h b/src/ray/gcs/gcs_server/gcs_object_manager.h index 4d728e8e0..bd21bfd1b 100644 --- a/src/ray/gcs/gcs_server/gcs_object_manager.h +++ b/src/ray/gcs/gcs_server/gcs_object_manager.h @@ -18,7 +18,6 @@ #include "ray/gcs/gcs_server/gcs_node_manager.h" #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" namespace ray { diff --git a/src/ray/gcs/gcs_server/gcs_server.cc b/src/ray/gcs/gcs_server/gcs_server.cc index 71e2a6d81..672c593df 100644 --- a/src/ray/gcs/gcs_server/gcs_server.cc +++ b/src/ray/gcs/gcs_server/gcs_server.cc @@ -43,23 +43,22 @@ GcsServer::~GcsServer() { Stop(); } void GcsServer::Start() { // Init backend client. - GcsClientOptions options(config_.redis_address, config_.redis_port, - config_.redis_password, config_.is_test); - redis_gcs_client_ = std::make_shared(options); - auto status = redis_gcs_client_->Connect(main_service_); + RedisClientOptions redis_client_options(config_.redis_address, config_.redis_port, + config_.redis_password, config_.is_test); + redis_client_ = std::make_shared(redis_client_options); + auto status = redis_client_->Connect(main_service_); RAY_CHECK(status.ok()) << "Failed to init redis gcs client as " << status; // Init redis failure detector. gcs_redis_failure_detector_ = std::make_shared( - main_service_, redis_gcs_client_->primary_context(), [this]() { Stop(); }); + main_service_, redis_client_->GetPrimaryContext(), [this]() { Stop(); }); gcs_redis_failure_detector_->Start(); // Init gcs pub sub instance. - gcs_pub_sub_ = std::make_shared(redis_gcs_client_->GetRedisClient()); + gcs_pub_sub_ = std::make_shared(redis_client_); // Init gcs table storage. - gcs_table_storage_ = - std::make_shared(redis_gcs_client_->GetRedisClient()); + gcs_table_storage_ = std::make_shared(redis_client_); // Load gcs tables data asynchronously. auto gcs_init_data = std::make_shared(gcs_table_storage_); @@ -132,7 +131,7 @@ void GcsServer::Stop() { } void GcsServer::InitGcsNodeManager(const GcsInitData &gcs_init_data) { - RAY_CHECK(redis_gcs_client_ && gcs_table_storage_ && gcs_pub_sub_); + RAY_CHECK(redis_client_ && gcs_table_storage_ && gcs_pub_sub_); gcs_node_manager_ = std::make_shared(main_service_, gcs_pub_sub_, gcs_table_storage_); // Initialize by gcs tables data. @@ -255,7 +254,7 @@ void GcsServer::StoreGcsServerAddressInRedis() { std::string address = ip + ":" + std::to_string(GetPort()); RAY_LOG(INFO) << "Gcs server address = " << address; - RAY_CHECK_OK(redis_gcs_client_->primary_context()->RunArgvAsync( + RAY_CHECK_OK(redis_client_->GetPrimaryContext()->RunArgvAsync( {"SET", "GcsServerAddress", address})); RAY_LOG(INFO) << "Finished setting gcs server address: " << address; } diff --git a/src/ray/gcs/gcs_server/gcs_server.h b/src/ray/gcs/gcs_server/gcs_server.h index a2082539f..1527ca7cf 100644 --- a/src/ray/gcs/gcs_server/gcs_server.h +++ b/src/ray/gcs/gcs_server/gcs_server.h @@ -21,7 +21,7 @@ #include "ray/gcs/gcs_server/gcs_resource_manager.h" #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/redis_client.h" #include "ray/rpc/client_call.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" #include "ray/rpc/node_manager/node_manager_client_pool.h" @@ -176,7 +176,7 @@ class GcsServer { /// Placement Group info handler and service std::unique_ptr placement_group_info_service_; /// Backend client - std::shared_ptr redis_gcs_client_; + std::shared_ptr redis_client_; /// A publisher for publishing gcs messages. std::shared_ptr gcs_pub_sub_; /// The gcs table storage. diff --git a/src/ray/gcs/gcs_server/gcs_worker_manager.h b/src/ray/gcs/gcs_server/gcs_worker_manager.h index 094e881e6..60001aa12 100644 --- a/src/ray/gcs/gcs_server/gcs_worker_manager.h +++ b/src/ray/gcs/gcs_server/gcs_worker_manager.h @@ -16,7 +16,6 @@ #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" namespace ray { diff --git a/src/ray/gcs/gcs_server/stats_handler_impl.h b/src/ray/gcs/gcs_server/stats_handler_impl.h index d9de7e40b..2e065c621 100644 --- a/src/ray/gcs/gcs_server/stats_handler_impl.h +++ b/src/ray/gcs/gcs_server/stats_handler_impl.h @@ -16,7 +16,6 @@ #include "ray/common/ray_config.h" #include "ray/gcs/gcs_server/gcs_table_storage.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" namespace ray { diff --git a/src/ray/gcs/gcs_server/task_info_handler_impl.h b/src/ray/gcs/gcs_server/task_info_handler_impl.h index 5a7599e8f..c32eb4894 100644 --- a/src/ray/gcs/gcs_server/task_info_handler_impl.h +++ b/src/ray/gcs/gcs_server/task_info_handler_impl.h @@ -16,7 +16,6 @@ #include "ray/gcs/gcs_server/gcs_table_storage.h" #include "ray/gcs/pubsub/gcs_pub_sub.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/rpc/gcs_server/gcs_rpc_server.h" namespace ray { diff --git a/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc index 700fdfc10..f6842d287 100644 --- a/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_object_manager_test.cc @@ -86,7 +86,6 @@ class GcsObjectManagerTest : public ::testing::Test { boost::asio::io_service io_service_; std::shared_ptr gcs_resource_manager_; std::shared_ptr gcs_node_manager_; - std::shared_ptr gcs_client_; std::shared_ptr gcs_pub_sub_; std::shared_ptr gcs_object_manager_; std::shared_ptr gcs_table_storage_; diff --git a/src/ray/gcs/redis_accessor.cc b/src/ray/gcs/redis_accessor.cc deleted file mode 100644 index 248eb9a89..000000000 --- a/src/ray/gcs/redis_accessor.cc +++ /dev/null @@ -1,697 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ray/gcs/redis_accessor.h" - -#include - -#include "ray/gcs/pb_util.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/util/logging.h" - -namespace ray { - -namespace gcs { - -RedisLogBasedActorInfoAccessor::RedisLogBasedActorInfoAccessor( - RedisGcsClient *client_impl) - : client_impl_(client_impl), - log_based_actor_sub_executor_(client_impl_->log_based_actor_table()) {} - -std::vector RedisLogBasedActorInfoAccessor::GetAllActorID() const { - return client_impl_->log_based_actor_table().GetAllActorID(); -} - -Status RedisLogBasedActorInfoAccessor::Get(const ActorID &actor_id, - ActorTableData *actor_table_data) const { - return client_impl_->log_based_actor_table().Get(actor_id, actor_table_data); -} - -Status RedisLogBasedActorInfoAccessor::GetAll( - std::vector *actor_table_data_list) { - RAY_CHECK(actor_table_data_list); - auto actor_id_list = GetAllActorID(); - actor_table_data_list->resize(actor_id_list.size()); - for (size_t i = 0; i < actor_id_list.size(); ++i) { - RAY_CHECK_OK(Get(actor_id_list[i], &(*actor_table_data_list)[i])); - } - return Status::OK(); -} - -Status RedisLogBasedActorInfoAccessor::AsyncGet( - const ActorID &actor_id, const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id, - const std::vector &data) { - boost::optional result; - if (!data.empty()) { - result = data.back(); - } - callback(Status::OK(), result); - }; - - return client_impl_->log_based_actor_table().Lookup(actor_id.JobId(), actor_id, - on_done); -} - -Status RedisLogBasedActorInfoAccessor::AsyncRegisterActor( - const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) { - const std::string error_msg = - "Unsupported method of AsyncRegisterActor in RedisLogBasedActorInfoAccessor."; - RAY_LOG(FATAL) << error_msg; - return Status::Invalid(error_msg); -} - -Status RedisLogBasedActorInfoAccessor::AsyncCreateActor( - const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) { - const std::string error_msg = - "Unsupported method of AsyncCreateActor in " - "RedisLogBasedActorInfoAccessor."; - RAY_LOG(FATAL) << error_msg; - return Status::Invalid(error_msg); -} - -Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll( - const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); -} - -Status RedisLogBasedActorInfoAccessor::AsyncSubscribe( - const ActorID &actor_id, const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return log_based_actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe, - done); -} - -Status RedisLogBasedActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) { - return log_based_actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr); -} - -RedisActorInfoAccessor::RedisActorInfoAccessor(RedisGcsClient *client_impl) - : RedisLogBasedActorInfoAccessor(client_impl), - actor_sub_executor_(client_impl_->actor_table()) {} - -std::vector RedisActorInfoAccessor::GetAllActorID() const { - return client_impl_->actor_table().GetAllActorID(); -} - -Status RedisActorInfoAccessor::Get(const ActorID &actor_id, - ActorTableData *actor_table_data) const { - return client_impl_->actor_table().Get(actor_id, actor_table_data); -} - -Status RedisActorInfoAccessor::AsyncGet( - const ActorID &actor_id, const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id, - const ActorTableData &data) { callback(Status::OK(), data); }; - - auto on_failure = [callback](RedisGcsClient *client, const ActorID &actor_id) { - if (callback != nullptr) { - callback(Status::Invalid("Get actor failed."), boost::none); - } - }; - - return client_impl_->actor_table().Lookup(JobID::Nil(), actor_id, on_done, on_failure); -} - -Status RedisActorInfoAccessor::AsyncGetAll( - const MultiItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto actor_id_list = GetAllActorID(); - if (actor_id_list.empty()) { - callback(Status::OK(), std::vector()); - return Status::OK(); - } - - auto finished_count = std::make_shared(0); - auto result = std::make_shared>(); - int size = actor_id_list.size(); - for (auto &actor_id : actor_id_list) { - auto on_done = [finished_count, size, result, callback]( - const Status &status, - const boost::optional &data) { - ++(*finished_count); - if (data) { - result->push_back(*data); - } - if (*finished_count == size) { - callback(Status::OK(), *result); - } - }; - RAY_CHECK_OK(AsyncGet(actor_id, on_done)); - } - - return Status::OK(); -} - -Status RedisActorInfoAccessor::AsyncSubscribeAll( - const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); -} - -Status RedisActorInfoAccessor::AsyncSubscribe( - const ActorID &actor_id, const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe, done); -} - -Status RedisActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) { - return actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr); -} - -RedisJobInfoAccessor::RedisJobInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), job_sub_executor_(client_impl->job_table()) {} - -Status RedisJobInfoAccessor::AsyncAdd(const std::shared_ptr &data_ptr, - const StatusCallback &callback) { - return DoAsyncAppend(data_ptr, callback); -} - -Status RedisJobInfoAccessor::AsyncMarkFinished(const JobID &job_id, - const StatusCallback &callback) { - std::shared_ptr data_ptr = - CreateJobTableData(job_id, /*is_dead*/ true, /*time_stamp*/ std::time(nullptr), - /*driver_ip_address*/ "", /*driver_pid*/ -1); - return DoAsyncAppend(data_ptr, callback); -} - -Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr &data_ptr, - const StatusCallback &callback) { - JobTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const JobID &job_id, - const JobTableData &data) { callback(Status::OK()); }; - } - - JobID job_id = JobID::FromBinary(data_ptr->job_id()); - return client_impl_->job_table().Append(job_id, job_id, data_ptr, on_done); -} - -Status RedisJobInfoAccessor::AsyncSubscribeAll( - const SubscribeCallback &subscribe, const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); -} - -RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), - task_sub_executor_(client_impl->raylet_task_table()), - task_lease_sub_executor_(client_impl->task_lease_table()) {} - -Status RedisTaskInfoAccessor::AsyncAdd(const std::shared_ptr &data_ptr, - const StatusCallback &callback) { - raylet::TaskTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const TaskID &task_id, - const TaskTableData &data) { callback(Status::OK()); }; - } - - TaskID task_id = TaskID::FromBinary(data_ptr->task().task_spec().task_id()); - raylet::TaskTable &task_table = client_impl_->raylet_task_table(); - return task_table.Add(task_id.JobId(), task_id, data_ptr, on_done); -} - -Status RedisTaskInfoAccessor::AsyncGet( - const TaskID &task_id, const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id, - const TaskTableData &data) { - boost::optional result(data); - callback(Status::OK(), result); - }; - - auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) { - boost::optional result; - callback(Status::Invalid("Task not exist."), result); - }; - - raylet::TaskTable &task_table = client_impl_->raylet_task_table(); - return task_table.Lookup(task_id.JobId(), task_id, on_success, on_failure); -} - -Status RedisTaskInfoAccessor::AsyncSubscribe( - const TaskID &task_id, const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return task_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done); -} - -Status RedisTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) { - return task_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr); -} - -Status RedisTaskInfoAccessor::AsyncAddTaskLease( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - TaskLeaseTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const TaskID &id, - const TaskLeaseData &data) { callback(Status::OK()); }; - } - TaskID task_id = TaskID::FromBinary(data_ptr->task_id()); - TaskLeaseTable &task_lease_table = client_impl_->task_lease_table(); - return task_lease_table.Add(task_id.JobId(), task_id, data_ptr, on_done); -} - -Status RedisTaskInfoAccessor::AsyncGetTaskLease( - const TaskID &task_id, const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id, - const TaskLeaseData &data) { - boost::optional result(data); - callback(Status::OK(), result); - }; - - auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) { - boost::optional result; - callback(Status::Invalid("Task lease not exist."), result); - }; - - TaskLeaseTable &task_lease_table = client_impl_->task_lease_table(); - return task_lease_table.Lookup(task_id.JobId(), task_id, on_success, on_failure); -} - -Status RedisTaskInfoAccessor::AsyncSubscribeTaskLease( - const TaskID &task_id, - const SubscribeCallback> &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return task_lease_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done); -} - -Status RedisTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &task_id) { - return task_lease_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr); -} - -Status RedisTaskInfoAccessor::AttemptTaskReconstruction( - const std::shared_ptr &data_ptr, - const StatusCallback &callback) { - TaskReconstructionLog::WriteCallback on_success = nullptr; - TaskReconstructionLog::WriteCallback on_failure = nullptr; - if (callback != nullptr) { - on_success = [callback](RedisGcsClient *client, const TaskID &id, - const TaskReconstructionData &data) { - callback(Status::OK()); - }; - on_failure = [callback](RedisGcsClient *client, const TaskID &id, - const TaskReconstructionData &data) { - callback(Status::Invalid("Updating task reconstruction failed.")); - }; - } - - TaskID task_id = TaskID::FromBinary(data_ptr->task_id()); - int reconstruction_attempt = data_ptr->num_reconstructions(); - TaskReconstructionLog &task_reconstruction_log = - client_impl_->task_reconstruction_log(); - return task_reconstruction_log.AppendAt(task_id.JobId(), task_id, data_ptr, on_success, - on_failure, reconstruction_attempt); -} - -RedisObjectInfoAccessor::RedisObjectInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), object_sub_executor_(client_impl->object_table()) {} - -Status RedisObjectInfoAccessor::AsyncGetLocations( - const ObjectID &object_id, - const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const ObjectID &object_id, - const std::vector &data) { - rpc::ObjectLocationInfo info; - info.set_object_id(object_id.Binary()); - for (const auto &item : data) { - auto item_ptr = info.add_locations(); - item_ptr->CopyFrom(item); - } - callback(Status::OK(), info); - }; - - ObjectTable &object_table = client_impl_->object_table(); - return object_table.Lookup(object_id.TaskId().JobId(), object_id, on_done); -} - -Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id, - const NodeID &node_id, - const StatusCallback &callback) { - std::function - on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ObjectID &object_id, - const ObjectTableData &data) { callback(Status::OK()); }; - } - - std::shared_ptr data_ptr = std::make_shared(); - data_ptr->set_manager(node_id.Binary()); - - ObjectTable &object_table = client_impl_->object_table(); - return object_table.Add(object_id.TaskId().JobId(), object_id, data_ptr, on_done); -} - -Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id, - const NodeID &node_id, - const StatusCallback &callback) { - std::function - on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const ObjectID &object_id, - const ObjectTableData &data) { callback(Status::OK()); }; - } - - std::shared_ptr data_ptr = std::make_shared(); - data_ptr->set_manager(node_id.Binary()); - - ObjectTable &object_table = client_impl_->object_table(); - return object_table.Remove(object_id.TaskId().JobId(), object_id, data_ptr, on_done); -} - -Status RedisObjectInfoAccessor::AsyncSubscribeToLocations( - const ObjectID &object_id, - const SubscribeCallback> &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return object_sub_executor_.AsyncSubscribe( - subscribe_id_, object_id, - [subscribe](const ObjectID &id, const ObjectChangeNotification ¬ification_data) { - std::vector updates; - for (const auto &item : notification_data.GetData()) { - rpc::ObjectLocationChange update; - update.set_is_add(notification_data.IsAdded()); - update.set_node_id(item.manager()); - updates.push_back(update); - } - subscribe(id, updates); - }, - done); -} - -Status RedisObjectInfoAccessor::AsyncUnsubscribeToLocations(const ObjectID &object_id) { - return object_sub_executor_.AsyncUnsubscribe(subscribe_id_, object_id, nullptr); -} - -RedisNodeInfoAccessor::RedisNodeInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), - resource_usage_batch_sub_executor_(client_impl->resource_usage_batch_table()) {} - -Status RedisNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info, - const StatusCallback &callback) { - NodeTable &node_table = client_impl_->node_table(); - Status status = node_table.Connect(local_node_info); - if (callback != nullptr) { - callback(Status::OK()); - } - return status; -} - -Status RedisNodeInfoAccessor::UnregisterSelf() { - NodeTable &node_table = client_impl_->node_table(); - return node_table.Disconnect(); -} - -const NodeID &RedisNodeInfoAccessor::GetSelfId() const { - NodeTable &node_table = client_impl_->node_table(); - return node_table.GetLocalNodeId(); -} - -const GcsNodeInfo &RedisNodeInfoAccessor::GetSelfInfo() const { - NodeTable &node_table = client_impl_->node_table(); - return node_table.GetLocalNode(); -} - -Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info, - const StatusCallback &callback) { - NodeTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const NodeID &id, - const GcsNodeInfo &data) { callback(Status::OK()); }; - } - NodeTable &node_table = client_impl_->node_table(); - return node_table.MarkConnected(node_info, on_done); -} - -Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id, - const StatusCallback &callback) { - NodeTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const NodeID &id, - const GcsNodeInfo &data) { callback(Status::OK()); }; - } - NodeTable &node_table = client_impl_->node_table(); - return node_table.MarkDisconnected(node_id, on_done); -} - -Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - NodeTable &node_table = client_impl_->node_table(); - return node_table.SubscribeToNodeChange(subscribe, done); -} - -Status RedisNodeInfoAccessor::AsyncGetAll( - const MultiItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const NodeID &id, - const std::vector &data) { - std::vector result; - std::set node_ids; - for (int index = data.size() - 1; index >= 0; --index) { - if (node_ids.insert(data[index].node_id()).second) { - result.emplace_back(data[index]); - } - } - callback(Status::OK(), result); - }; - NodeTable &node_table = client_impl_->node_table(); - return node_table.Lookup(on_done); -} - -boost::optional RedisNodeInfoAccessor::Get(const NodeID &node_id, - bool filter_dead_nodes) const { - GcsNodeInfo node_info; - NodeTable &node_table = client_impl_->node_table(); - bool found = node_table.GetNode(node_id, &node_info); - boost::optional optional_node; - if (found) { - optional_node = std::move(node_info); - } - return optional_node; -} - -const std::unordered_map &RedisNodeInfoAccessor::GetAll() const { - NodeTable &node_table = client_impl_->node_table(); - return node_table.GetAllNodes(); -} - -bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const { - NodeTable &node_table = client_impl_->node_table(); - return node_table.IsRemoved(node_id); -} -Status RedisNodeInfoAccessor::AsyncReportHeartbeat( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - HeartbeatTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const NodeID &node_id, - const HeartbeatTableData &data) { callback(Status::OK()); }; - } - - NodeID node_id = NodeID::FromBinary(data_ptr->node_id()); - HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table(); - return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done); -} - -Status RedisNodeInfoAccessor::AsyncReportResourceUsage( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - return Status::Invalid("Not implemented"); -} - -void RedisNodeInfoAccessor::AsyncReReportResourceUsage() {} - -Status RedisNodeInfoAccessor::AsyncSubscribeBatchedResourceUsage( - const ItemCallback &subscribe, const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - auto on_subscribe = [subscribe](const NodeID &node_id, - const ResourceUsageBatchData &data) { - subscribe(data); - }; - - return resource_usage_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, - done); -} - -RedisNodeResourceInfoAccessor::RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), resource_sub_executor_(client_impl_->resource_table()) {} - -Status RedisNodeResourceInfoAccessor::AsyncGetResources( - const NodeID &node_id, const OptionalItemCallback &callback) { - RAY_CHECK(callback != nullptr); - auto on_done = [callback](RedisGcsClient *client, const NodeID &id, - const ResourceMap &data) { - boost::optional result; - if (!data.empty()) { - result = data; - } - callback(Status::OK(), result); - }; - - DynamicResourceTable &resource_table = client_impl_->resource_table(); - return resource_table.Lookup(JobID::Nil(), node_id, on_done); -} - -Status RedisNodeResourceInfoAccessor::AsyncUpdateResources( - const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) { - Hash::HashCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const NodeID &node_id, - const ResourceMap &resources) { callback(Status::OK()); }; - } - - DynamicResourceTable &resource_table = client_impl_->resource_table(); - return resource_table.Update(JobID::Nil(), node_id, resources, on_done); -} - -Status RedisNodeResourceInfoAccessor::AsyncDeleteResources( - const NodeID &node_id, const std::vector &resource_names, - const StatusCallback &callback) { - Hash::HashRemoveCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const NodeID &node_id, - const std::vector &resource_names) { - callback(Status::OK()); - }; - } - - DynamicResourceTable &resource_table = client_impl_->resource_table(); - return resource_table.RemoveEntries(JobID::Nil(), node_id, resource_names, on_done); -} - -Status RedisNodeResourceInfoAccessor::AsyncSubscribeToResources( - const ItemCallback &subscribe, const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - auto on_subscribe = [subscribe](const NodeID &id, - const ResourceChangeNotification &result) { - rpc::NodeResourceChange node_resource_change; - node_resource_change.set_node_id(id.Binary()); - if (result.IsAdded()) { - for (auto &it : result.GetData()) { - (*node_resource_change.mutable_updated_resources())[it.first] = - it.second->resource_capacity(); - } - } else { - for (auto &it : result.GetData()) { - node_resource_change.add_deleted_resources(it.first); - } - } - subscribe(node_resource_change); - }; - return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done); -} - -RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {} - -Status RedisErrorInfoAccessor::AsyncReportJobError( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - return Status::Invalid("Not implemented"); -} - -RedisStatsInfoAccessor::RedisStatsInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl) {} - -Status RedisStatsInfoAccessor::AsyncAddProfileData( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - ProfileTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const UniqueID &id, - const ProfileTableData &data) { callback(Status::OK()); }; - } - - ProfileTable &profile_table = client_impl_->profile_table(); - return profile_table.Append(JobID::Nil(), UniqueID::FromRandom(), data_ptr, on_done); -} - -RedisWorkerInfoAccessor::RedisWorkerInfoAccessor(RedisGcsClient *client_impl) - : client_impl_(client_impl), - worker_failure_sub_executor_(client_impl->worker_table()) {} - -Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures( - const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(subscribe != nullptr); - return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done); -} - -Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure( - const std::shared_ptr &data_ptr, const StatusCallback &callback) { - WorkerTable::WriteCallback on_done = nullptr; - if (callback != nullptr) { - on_done = [callback](RedisGcsClient *client, const WorkerID &id, - const WorkerTableData &data) { callback(Status::OK()); }; - } - - WorkerID worker_id = WorkerID::FromBinary(data_ptr->worker_address().worker_id()); - WorkerTable &worker_failure_table = client_impl_->worker_table(); - return worker_failure_table.Add(JobID::Nil(), worker_id, data_ptr, on_done); -} - -Status RedisWorkerInfoAccessor::AsyncGet( - const WorkerID &worker_id, - const OptionalItemCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisWorkerInfoAccessor::AsyncGetAll( - const MultiItemCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisWorkerInfoAccessor::AsyncAdd( - const std::shared_ptr &data_ptr, - const StatusCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisPlacementGroupInfoAccessor::AsyncCreatePlacementGroup( - const PlacementGroupSpecification &placement_group_spec) { - return Status::Invalid("Not implemented"); -} - -Status RedisPlacementGroupInfoAccessor::AsyncRemovePlacementGroup( - const PlacementGroupID &placement_group_id, const StatusCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisPlacementGroupInfoAccessor::AsyncGet( - const PlacementGroupID &placement_group_id, - const OptionalItemCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisPlacementGroupInfoAccessor::AsyncGetAll( - const MultiItemCallback &callback) { - return Status::Invalid("Not implemented"); -} - -Status RedisPlacementGroupInfoAccessor::AsyncWaitUntilReady( - const PlacementGroupID &placement_group_id, const StatusCallback &callback) { - return Status::Invalid("Not implemented"); -} - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/redis_accessor.h b/src/ray/gcs/redis_accessor.h deleted file mode 100644 index ec5d389f6..000000000 --- a/src/ray/gcs/redis_accessor.h +++ /dev/null @@ -1,491 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "ray/common/id.h" -#include "ray/common/task/task_spec.h" -#include "ray/gcs/accessor.h" -#include "ray/gcs/callback.h" -#include "ray/gcs/subscription_executor.h" -#include "ray/gcs/tables.h" - -namespace ray { - -namespace gcs { - -class RedisGcsClient; - -/// \class RedisLogBasedActorInfoAccessor -/// `RedisLogBasedActorInfoAccessor` is an implementation of `ActorInfoAccessor` -/// that uses Redis as the backend storage. -class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor { - public: - explicit RedisLogBasedActorInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisLogBasedActorInfoAccessor() {} - - Status GetAll(std::vector *actor_table_data_list) override; - - Status AsyncGet(const ActorID &actor_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override { - return Status::NotImplemented( - "RedisLogBasedActorInfoAccessor does not support AsyncGetAll."); - } - - Status AsyncGetByName(const std::string &name, - const OptionalItemCallback &callback) override { - return Status::NotImplemented( - "RedisLogBasedActorInfoAccessor does not support named detached actors."); - } - - Status AsyncRegisterActor(const TaskSpecification &task_spec, - const StatusCallback &callback) override; - - Status AsyncCreateActor(const TaskSpecification &task_spec, - const StatusCallback &callback) override; - - Status AsyncSubscribeAll(const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncSubscribe(const ActorID &actor_id, - const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncUnsubscribe(const ActorID &actor_id) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - bool IsActorUnsubscribed(const ActorID &actor_id) override { return false; } - - protected: - virtual std::vector GetAllActorID() const; - virtual Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const; - - RedisGcsClient *client_impl_{nullptr}; - // Use a random NodeID for actor subscription. Because: - // If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client. - // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local NodeID, so we use - // random NodeID instead. - // TODO(micafan): Remove this random id, once GCS becomes a service. - NodeID subscribe_id_{NodeID::FromRandom()}; - - private: - typedef SubscriptionExecutor - ActorSubscriptionExecutor; - ActorSubscriptionExecutor log_based_actor_sub_executor_; -}; - -/// \class RedisActorInfoAccessor -/// `RedisActorInfoAccessor` is an implementation of `ActorInfoAccessor` -/// that uses Redis as the backend storage. -class RedisActorInfoAccessor : public RedisLogBasedActorInfoAccessor { - public: - explicit RedisActorInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisActorInfoAccessor() {} - - Status AsyncGet(const ActorID &actor_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override; - - Status AsyncGetByName(const std::string &name, - const OptionalItemCallback &callback) override { - return Status::NotImplemented( - "RedisActorInfoAccessor does not support named detached actors."); - } - - Status AsyncSubscribeAll(const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncSubscribe(const ActorID &actor_id, - const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncUnsubscribe(const ActorID &actor_id) override; - - protected: - std::vector GetAllActorID() const override; - Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const override; - - private: - typedef SubscriptionExecutor - ActorSubscriptionExecutor; - ActorSubscriptionExecutor actor_sub_executor_; -}; - -/// \class RedisJobInfoAccessor -/// RedisJobInfoAccessor is an implementation of `JobInfoAccessor` -/// that uses Redis as the backend storage. -class RedisJobInfoAccessor : public JobInfoAccessor { - public: - explicit RedisJobInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisJobInfoAccessor() {} - - Status AsyncAdd(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override; - - Status AsyncSubscribeAll(const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override { - return Status::NotImplemented("AsyncGetAll not implemented"); - } - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - private: - /// Append job information to GCS asynchronously. - /// - /// \param data_ptr The job information that will be appended to GCS. - /// \param callback Callback that will be called after append done. - /// \return Status - Status DoAsyncAppend(const std::shared_ptr &data_ptr, - const StatusCallback &callback); - - RedisGcsClient *client_impl_{nullptr}; - - typedef SubscriptionExecutor JobSubscriptionExecutor; - JobSubscriptionExecutor job_sub_executor_; -}; - -/// \class RedisTaskInfoAccessor -/// `RedisTaskInfoAccessor` is an implementation of `TaskInfoAccessor` -/// that uses Redis as the backend storage. -class RedisTaskInfoAccessor : public TaskInfoAccessor { - public: - explicit RedisTaskInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisTaskInfoAccessor() {} - - Status AsyncAdd(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncGet(const TaskID &task_id, - const OptionalItemCallback &callback) override; - - Status AsyncSubscribe(const TaskID &task_id, - const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncUnsubscribe(const TaskID &task_id) override; - - Status AsyncAddTaskLease(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncGetTaskLease(const TaskID &task_id, - const OptionalItemCallback &callback) override; - - Status AsyncSubscribeTaskLease( - const TaskID &task_id, - const SubscribeCallback> &subscribe, - const StatusCallback &done) override; - - Status AsyncUnsubscribeTaskLease(const TaskID &task_id) override; - - Status AttemptTaskReconstruction( - const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - bool IsTaskUnsubscribed(const TaskID &task_id) override { return false; } - - bool IsTaskLeaseUnsubscribed(const TaskID &task_id) override { return false; } - - private: - RedisGcsClient *client_impl_{nullptr}; - // Use a random NodeID for task subscription. Because: - // If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client. - // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local NodeID, so we use - // random NodeID instead. - // TODO(micafan): Remove this random id, once GCS becomes a service. - NodeID subscribe_id_{NodeID::FromRandom()}; - - typedef SubscriptionExecutor - TaskSubscriptionExecutor; - TaskSubscriptionExecutor task_sub_executor_; - - typedef SubscriptionExecutor, TaskLeaseTable> - TaskLeaseSubscriptionExecutor; - TaskLeaseSubscriptionExecutor task_lease_sub_executor_; -}; - -/// \class RedisObjectInfoAccessor -/// RedisObjectInfoAccessor is an implementation of `ObjectInfoAccessor` -/// that uses Redis as the backend storage. -class RedisObjectInfoAccessor : public ObjectInfoAccessor { - public: - explicit RedisObjectInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisObjectInfoAccessor() {} - - Status AsyncGetLocations( - const ObjectID &object_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAll( - const MultiItemCallback &callback) override { - return Status::NotImplemented("AsyncGetAll not implemented"); - } - - Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id, - const StatusCallback &callback) override; - - Status AsyncAddSpilledUrl(const ObjectID &object_id, const std::string &spilled_url, - const StatusCallback &callback) override { - return Status::NotImplemented("AsyncAddSpilledUrl not implemented"); - } - - Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id, - const StatusCallback &callback) override; - - Status AsyncSubscribeToLocations( - const ObjectID &object_id, - const SubscribeCallback> - &subscribe, - const StatusCallback &done) override; - - Status AsyncUnsubscribeToLocations(const ObjectID &object_id) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - bool IsObjectUnsubscribed(const ObjectID &object_id) override { return false; } - - private: - RedisGcsClient *client_impl_{nullptr}; - - // Use a random NodeID for object subscription. Because: - // If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client. - // Even we can filter out irrelevant updates, but there will be extra overhead. - // And because the new GCS Client will no longer hold the local NodeID, so we use - // random NodeID instead. - // TODO(micafan): Remove this random id, once GCS becomes a service. - NodeID subscribe_id_{NodeID::FromRandom()}; - - typedef SubscriptionExecutor - ObjectSubscriptionExecutor; - ObjectSubscriptionExecutor object_sub_executor_; -}; - -/// \class RedisNodeInfoAccessor -/// RedisNodeInfoAccessor is an implementation of `NodeInfoAccessor` -/// that uses Redis as the backend storage. -class RedisNodeInfoAccessor : public NodeInfoAccessor { - public: - explicit RedisNodeInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisNodeInfoAccessor() {} - - Status RegisterSelf(const GcsNodeInfo &local_node_info, - const StatusCallback &callback) override; - - Status UnregisterSelf() override; - - const NodeID &GetSelfId() const override; - - const GcsNodeInfo &GetSelfInfo() const override; - - Status AsyncRegister(const GcsNodeInfo &node_info, - const StatusCallback &callback) override; - - Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override; - - Status AsyncSubscribeToNodeChange( - const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - boost::optional Get(const NodeID &node_id, - bool filter_dead_nodes = true) const override; - - const std::unordered_map &GetAll() const override; - - bool IsRemoved(const NodeID &node_id) const override; - - Status AsyncReportHeartbeat(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncReportResourceUsage(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - void AsyncReReportResourceUsage() override; - - Status AsyncGetAllResourceUsage( - const ItemCallback &callback) override { - return Status::NotImplemented("AsyncGetAllResourceUsage not implemented"); - } - - Status AsyncSubscribeBatchedResourceUsage( - const ItemCallback &subscribe, - const StatusCallback &done) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - Status AsyncSetInternalConfig( - std::unordered_map &config) override { - return Status::NotImplemented("SetInternaConfig not implemented."); - } - - Status AsyncGetInternalConfig( - const OptionalItemCallback> &callback) - override { - return Status::NotImplemented("GetInternalConfig not implemented."); - } - - private: - RedisGcsClient *client_impl_{nullptr}; - - typedef SubscriptionExecutor - HeartbeatBatchSubscriptionExecutor; - HeartbeatBatchSubscriptionExecutor resource_usage_batch_sub_executor_; -}; - -/// \class RedisNodeResourceInfoAccessor -/// RedisNodeResourceInfoAccessor is an implementation of `NodeResourceInfoAccessor` -/// that uses Redis as the backend storage. -class RedisNodeResourceInfoAccessor : public NodeResourceInfoAccessor { - public: - explicit RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisNodeResourceInfoAccessor() {} - - Status AsyncGetResources(const NodeID &node_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAllAvailableResources( - const MultiItemCallback &callback) override { - return Status::NotImplemented("AsyncGetAllAvailableResources not implemented"); - } - - Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources, - const StatusCallback &callback) override; - - Status AsyncDeleteResources(const NodeID &node_id, - const std::vector &resource_names, - const StatusCallback &callback) override; - - Status AsyncSubscribeToResources(const ItemCallback &subscribe, - const StatusCallback &done) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - private: - RedisGcsClient *client_impl_{nullptr}; - - typedef SubscriptionExecutor - DynamicResourceSubscriptionExecutor; - DynamicResourceSubscriptionExecutor resource_sub_executor_; -}; - -/// \class RedisErrorInfoAccessor -/// RedisErrorInfoAccessor is an implementation of `ErrorInfoAccessor` -/// that uses Redis as the backend storage. -class RedisErrorInfoAccessor : public ErrorInfoAccessor { - public: - explicit RedisErrorInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisErrorInfoAccessor() = default; - - Status AsyncReportJobError(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; -}; - -/// \class RedisStatsInfoAccessor -/// RedisStatsInfoAccessor is an implementation of `StatsInfoAccessor` -/// that uses Redis as the backend storage. -class RedisStatsInfoAccessor : public StatsInfoAccessor { - public: - explicit RedisStatsInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisStatsInfoAccessor() = default; - - Status AsyncAddProfileData(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override { - return Status::NotImplemented("AsyncGetAll not implemented"); - } - - private: - RedisGcsClient *client_impl_{nullptr}; -}; - -/// \class RedisWorkerInfoAccessor -/// RedisWorkerInfoAccessor is an implementation of `WorkerInfoAccessor` -/// that uses Redis as the backend storage. -class RedisWorkerInfoAccessor : public WorkerInfoAccessor { - public: - explicit RedisWorkerInfoAccessor(RedisGcsClient *client_impl); - - virtual ~RedisWorkerInfoAccessor() = default; - - Status AsyncSubscribeToWorkerFailures( - const SubscribeCallback &subscribe, - const StatusCallback &done) override; - - Status AsyncReportWorkerFailure(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - Status AsyncGet(const WorkerID &worker_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAll(const MultiItemCallback &callback) override; - - Status AsyncAdd(const std::shared_ptr &data_ptr, - const StatusCallback &callback) override; - - void AsyncResubscribe(bool is_pubsub_server_restarted) override {} - - private: - RedisGcsClient *client_impl_{nullptr}; - - typedef SubscriptionExecutor - WorkerFailureSubscriptionExecutor; - WorkerFailureSubscriptionExecutor worker_failure_sub_executor_; -}; - -class RedisPlacementGroupInfoAccessor : public PlacementGroupInfoAccessor { - public: - virtual ~RedisPlacementGroupInfoAccessor() = default; - - Status AsyncCreatePlacementGroup( - const PlacementGroupSpecification &placement_group_spec) override; - - Status AsyncRemovePlacementGroup(const PlacementGroupID &placement_group_id, - const StatusCallback &callback) override; - - Status AsyncGet( - const PlacementGroupID &placement_group_id, - const OptionalItemCallback &callback) override; - - Status AsyncGetAll( - const MultiItemCallback &callback) override; - - Status AsyncWaitUntilReady(const PlacementGroupID &placement_group_id, - const StatusCallback &callback) override; -}; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/redis_gcs_client.cc b/src/ray/gcs/redis_gcs_client.cc deleted file mode 100644 index 1b2359346..000000000 --- a/src/ray/gcs/redis_gcs_client.cc +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ray/gcs/redis_gcs_client.h" - -#include "ray/common/ray_config.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_context.h" - -namespace ray { - -namespace gcs { - -RedisGcsClient::RedisGcsClient(const GcsClientOptions &options) - : RedisGcsClient(options, CommandType::kRegular) {} - -RedisGcsClient::RedisGcsClient(const GcsClientOptions &options, CommandType command_type) - : GcsClient(options), command_type_(command_type) { - RedisClientOptions redis_client_options(options.server_ip_, options.server_port_, - options.password_, options.is_test_client_); - redis_client_.reset(new RedisClient(redis_client_options)); -} - -Status RedisGcsClient::Connect(boost::asio::io_service &io_service) { - RAY_CHECK(!is_connected_); - - Status status = redis_client_->Connect(io_service); - if (!status.ok()) { - RAY_LOG(INFO) << "RedisGcsClient::Connect failed, status " << status.ToString(); - return status; - } - - std::shared_ptr primary_context = redis_client_->GetPrimaryContext(); - std::vector> shard_contexts = - redis_client_->GetShardContexts(); - - log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this)); - actor_table_.reset(new ActorTable({primary_context}, this)); - - // TODO(micafan) Modify NodeTable' Constructor(remove NodeID) in future. - // We will use NodeID instead of NodeID. - // For worker/driver, it might not have this field(NodeID). - // For raylet, NodeID should be initialized in raylet layer(not here). - node_table_.reset(new NodeTable({primary_context}, this)); - - job_table_.reset(new JobTable({primary_context}, this)); - resource_usage_batch_table_.reset(new ResourceUsageBatchTable({primary_context}, this)); - // Tables below would be sharded. - object_table_.reset(new ObjectTable(shard_contexts, this)); - raylet_task_table_.reset(new raylet::TaskTable(shard_contexts, this, command_type_)); - task_reconstruction_log_.reset(new TaskReconstructionLog(shard_contexts, this)); - task_lease_table_.reset(new TaskLeaseTable(shard_contexts, this)); - heartbeat_table_.reset(new HeartbeatTable(shard_contexts, this)); - profile_table_.reset(new ProfileTable(shard_contexts, this)); - resource_table_.reset(new DynamicResourceTable({primary_context}, this)); - worker_table_.reset(new WorkerTable(shard_contexts, this)); - - actor_accessor_.reset(new RedisActorInfoAccessor(this)); - - job_accessor_.reset(new RedisJobInfoAccessor(this)); - object_accessor_.reset(new RedisObjectInfoAccessor(this)); - node_accessor_.reset(new RedisNodeInfoAccessor(this)); - node_resource_accessor_.reset(new RedisNodeResourceInfoAccessor(this)); - task_accessor_.reset(new RedisTaskInfoAccessor(this)); - error_accessor_.reset(new RedisErrorInfoAccessor(this)); - stats_accessor_.reset(new RedisStatsInfoAccessor(this)); - worker_accessor_.reset(new RedisWorkerInfoAccessor(this)); - placement_group_accessor_.reset(new RedisPlacementGroupInfoAccessor()); - - is_connected_ = true; - - RAY_LOG(DEBUG) << "RedisGcsClient connected."; - - return Status::OK(); -} - -void RedisGcsClient::Disconnect() { - RAY_CHECK(is_connected_); - is_connected_ = false; - redis_client_->Disconnect(); - RAY_LOG(DEBUG) << "RedisGcsClient Disconnected."; -} - -std::string RedisGcsClient::DebugString() const { - std::stringstream result; - result << "RedisGcsClient:"; - result << "\n- TaskTable: " << raylet_task_table_->DebugString(); - result << "\n- LogBasedActorTable: " << log_based_actor_table_->DebugString(); - result << "\n- ActorTable: " << actor_table_->DebugString(); - result << "\n- TaskReconstructionLog: " << task_reconstruction_log_->DebugString(); - result << "\n- TaskLeaseTable: " << task_lease_table_->DebugString(); - result << "\n- HeartbeatTable: " << heartbeat_table_->DebugString(); - result << "\n- ProfileTable: " << profile_table_->DebugString(); - result << "\n- NodeTable: " << node_table_->DebugString(); - result << "\n- JobTable: " << job_table_->DebugString(); - return result.str(); -} - -ObjectTable &RedisGcsClient::object_table() { return *object_table_; } - -raylet::TaskTable &RedisGcsClient::raylet_task_table() { return *raylet_task_table_; } - -LogBasedActorTable &RedisGcsClient::log_based_actor_table() { - return *log_based_actor_table_; -} - -ActorTable &RedisGcsClient::actor_table() { return *actor_table_; } - -WorkerTable &RedisGcsClient::worker_table() { return *worker_table_; } - -TaskReconstructionLog &RedisGcsClient::task_reconstruction_log() { - return *task_reconstruction_log_; -} - -TaskLeaseTable &RedisGcsClient::task_lease_table() { return *task_lease_table_; } - -NodeTable &RedisGcsClient::node_table() { return *node_table_; } - -HeartbeatTable &RedisGcsClient::heartbeat_table() { return *heartbeat_table_; } - -ResourceUsageBatchTable &RedisGcsClient::resource_usage_batch_table() { - return *resource_usage_batch_table_; -} - -JobTable &RedisGcsClient::job_table() { return *job_table_; } - -ProfileTable &RedisGcsClient::profile_table() { return *profile_table_; } - -DynamicResourceTable &RedisGcsClient::resource_table() { return *resource_table_; } - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/redis_gcs_client.h b/src/ray/gcs/redis_gcs_client.h deleted file mode 100644 index 748b1da72..000000000 --- a/src/ray/gcs/redis_gcs_client.h +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include "ray/common/id.h" -#include "ray/common/status.h" -#include "ray/gcs/asio.h" -#include "ray/gcs/gcs_client.h" -#include "ray/gcs/redis_client.h" -#include "ray/gcs/tables.h" -#include "ray/util/logging.h" - -namespace ray { - -namespace gcs { - -class RedisContext; - -class RAY_EXPORT RedisGcsClient : public GcsClient { - public: - /// Constructor of RedisGcsClient. - /// Connect() must be called(and return ok) before you call any other methods. - /// TODO(micafan) To read and write from the GCS tables requires a further - /// call to Connect() to the client table. Will fix this in next pr. - /// - /// \param options Options of this client, e.g. server address, password and so on. - RedisGcsClient(const GcsClientOptions &options); - - /// This constructor is only used for testing. - /// Connect() must be called(and return ok) before you call any other methods. - /// - /// \param options Options of this client, e.g. server address, password and so on. - /// \param command_type The commands issued type. - RedisGcsClient(const GcsClientOptions &options, CommandType command_type); - - /// Connect to GCS Service. Non-thread safe. - /// Call this function before calling other functions. - /// - /// \param io_service The event loop for this client. - /// Must be single-threaded io_service (get more information from RedisAsioClient). - /// - /// \return Status - Status Connect(boost::asio::io_service &io_service) override; - - /// Disconnect with GCS Service. Non-thread safe. - void Disconnect() override; - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const override; - - // We also need something to export generic code to run on workers from the - // driver (to set the PYTHONPATH) - using GetExportCallback = std::function; - Status AddExport(const std::string &job_id, std::string &export_data); - Status GetExport(const std::string &job_id, int64_t export_index, - const GetExportCallback &done_callback); - - std::vector> shard_contexts() { - return redis_client_->GetShardContexts(); - } - - std::shared_ptr primary_context() { - return redis_client_->GetPrimaryContext(); - } - - std::shared_ptr GetRedisClient() const { return redis_client_; } - - /// The following xxx_table methods implement the Accessor interfaces. - /// Implements the Actors() interface. - LogBasedActorTable &log_based_actor_table(); - ActorTable &actor_table(); - /// Implements the Jobs() interface. - JobTable &job_table(); - /// Implements the Objects() interface. - ObjectTable &object_table(); - /// Implements the Nodes() interface. - NodeTable &node_table(); - HeartbeatTable &heartbeat_table(); - ResourceUsageBatchTable &resource_usage_batch_table(); - DynamicResourceTable &resource_table(); - /// Implements the Tasks() interface. - virtual raylet::TaskTable &raylet_task_table(); - TaskLeaseTable &task_lease_table(); - TaskReconstructionLog &task_reconstruction_log(); - /// Implements the Stats() interface. - ProfileTable &profile_table(); - /// Implements the Workers() interface. - WorkerTable &worker_table(); - - private: - // GCS command type. If CommandType::kChain, chain-replicated versions of the tables - // might be used, if available. - CommandType command_type_{CommandType::kUnknown}; - - std::shared_ptr redis_client_; - - std::unique_ptr object_table_; - std::unique_ptr raylet_task_table_; - std::unique_ptr log_based_actor_table_; - std::unique_ptr actor_table_; - std::unique_ptr task_reconstruction_log_; - std::unique_ptr task_lease_table_; - std::unique_ptr heartbeat_table_; - std::unique_ptr resource_usage_batch_table_; - std::unique_ptr profile_table_; - std::unique_ptr node_table_; - std::unique_ptr resource_table_; - std::unique_ptr worker_table_; - std::unique_ptr job_table_; -}; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/subscription_executor.cc b/src/ray/gcs/subscription_executor.cc deleted file mode 100644 index d9617985a..000000000 --- a/src/ray/gcs/subscription_executor.cc +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ray/gcs/subscription_executor.h" - -namespace ray { - -namespace gcs { - -template -Status SubscriptionExecutor::AsyncSubscribeAll( - const NodeID &node_id, const SubscribeCallback &subscribe, - const StatusCallback &done) { - // TODO(micafan) Optimize the lock when necessary. - // Consider avoiding locking in single-threaded processes. - std::unique_lock lock(mutex_); - - if (subscribe_all_callback_ != nullptr) { - RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to all elements."; - return Status::Invalid("Duplicate subscription!"); - } - - if (registration_status_ != RegistrationStatus::kNotRegistered) { - if (subscribe != nullptr) { - RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to specific elements" - ", can't subscribe to all elements."; - return Status::Invalid("Duplicate subscription!"); - } - } - - if (registration_status_ == RegistrationStatus::kRegistered) { - // Already registered to GCS, just invoke the `done` callback. - lock.unlock(); - if (done != nullptr) { - done(Status::OK()); - } - return Status::OK(); - } - - // Registration to GCS is not finished yet, add the `done` callback to the pending list - // to be invoked when registration is done. - if (done != nullptr) { - pending_subscriptions_.emplace_back(done); - } - - // If there's another registration request that's already on-going, then wait for it - // to finish. - if (registration_status_ == RegistrationStatus::kRegistering) { - return Status::OK(); - } - - auto on_subscribe = [this](RedisGcsClient *client, const ID &id, - const std::vector &result) { - if (result.empty()) { - return; - } - - SubscribeCallback sub_one_callback = nullptr; - SubscribeCallback sub_all_callback = nullptr; - { - std::unique_lock lock(mutex_); - const auto it = id_to_callback_map_.find(id); - if (it != id_to_callback_map_.end()) { - sub_one_callback = it->second; - } - sub_all_callback = subscribe_all_callback_; - } - if (sub_one_callback != nullptr) { - sub_one_callback(id, result.back()); - } - if (sub_all_callback != nullptr) { - RAY_CHECK(sub_one_callback == nullptr); - sub_all_callback(id, result.back()); - } - }; - - auto on_done = [this](RedisGcsClient *client) { - std::list pending_callbacks; - { - std::unique_lock lock(mutex_); - registration_status_ = RegistrationStatus::kRegistered; - pending_callbacks.swap(pending_subscriptions_); - RAY_CHECK(pending_subscriptions_.empty()); - } - - for (const auto &callback : pending_callbacks) { - callback(Status::OK()); - } - }; - - Status status = table_.Subscribe(JobID::Nil(), node_id, on_subscribe, on_done); - if (status.ok()) { - registration_status_ = RegistrationStatus::kRegistering; - subscribe_all_callback_ = subscribe; - } - - return status; -} - -template -Status SubscriptionExecutor::AsyncSubscribe( - const NodeID &node_id, const ID &id, const SubscribeCallback &subscribe, - const StatusCallback &done) { - RAY_CHECK(node_id != NodeID::Nil()); - - // NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts, - // thus we need to call `RequestNotifications` in the Subscribe callback to ensure - // it's processed after the `Subscribe` request. Otherwise if `RequestNotifications` - // is processed first we will miss the initial notification. - auto on_subscribe_done = [this, node_id, id, subscribe, done](Status status) { - auto on_request_notification_done = [this, done, id](Status status) { - if (!status.ok()) { - std::unique_lock lock(mutex_); - id_to_callback_map_.erase(id); - } - if (done != nullptr) { - done(status); - } - }; - - { - std::unique_lock lock(mutex_); - status = table_.RequestNotifications(JobID::Nil(), id, node_id, - on_request_notification_done); - if (!status.ok()) { - id_to_callback_map_.erase(id); - } - } - }; - - { - std::unique_lock lock(mutex_); - const auto it = id_to_callback_map_.find(id); - if (it != id_to_callback_map_.end()) { - RAY_LOG(DEBUG) << "Duplicate subscription to id " << id << " node_id " << node_id; - return Status::Invalid("Duplicate subscription to element!"); - } - id_to_callback_map_[id] = subscribe; - } - - auto status = AsyncSubscribeAll(node_id, nullptr, on_subscribe_done); - if (!status.ok()) { - std::unique_lock lock(mutex_); - id_to_callback_map_.erase(id); - } - return status; -} - -template -Status SubscriptionExecutor::AsyncUnsubscribe( - const NodeID &node_id, const ID &id, const StatusCallback &done) { - SubscribeCallback subscribe = nullptr; - { - std::unique_lock lock(mutex_); - const auto it = id_to_callback_map_.find(id); - if (it == id_to_callback_map_.end()) { - RAY_LOG(DEBUG) << "Invalid Unsubscribe! id " << id << " node_id " << node_id; - return Status::Invalid("Invalid Unsubscribe, no existing subscription found."); - } - subscribe = std::move(it->second); - id_to_callback_map_.erase(it); - } - - RAY_CHECK(subscribe != nullptr); - auto on_done = [this, id, subscribe, done](Status status) { - if (!status.ok()) { - std::unique_lock lock(mutex_); - const auto it = id_to_callback_map_.find(id); - if (it != id_to_callback_map_.end()) { - // The initial AsyncUnsubscribe deleted the callback, but the client - // has subscribed again in the meantime. This new callback will be - // called if we receive more notifications. - RAY_LOG(WARNING) - << "Client called AsyncSubscribe on " << id - << " while AsyncUnsubscribe was pending, but the unsubscribe failed."; - } else { - // The Unsubscribe failed, so restore the initial callback. - id_to_callback_map_[id] = subscribe; - } - } - if (done != nullptr) { - done(status); - } - }; - - return table_.CancelNotifications(JobID::Nil(), id, node_id, on_done); -} - -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor, - TaskLeaseTable>; -template class SubscriptionExecutor; -template class SubscriptionExecutor; -template class SubscriptionExecutor; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/subscription_executor.h b/src/ray/gcs/subscription_executor.h deleted file mode 100644 index 48a912f3e..000000000 --- a/src/ray/gcs/subscription_executor.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include - -#include "ray/gcs/callback.h" -#include "ray/gcs/tables.h" - -namespace ray { - -namespace gcs { - -/// \class SubscriptionExecutor -/// SubscriptionExecutor class encapsulates the implementation details of -/// subscribe/unsubscribe to elements (e.g.: actors or tasks or objects or nodes). -/// Support subscribing to a specific element or subscribing to all elements. -template -class SubscriptionExecutor { - public: - explicit SubscriptionExecutor(Table &table) : table_(table) {} - - ~SubscriptionExecutor() {} - - /// Subscribe to operations of all elements. - /// Repeated subscription will return a failure. - /// - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each update will be received. Else, only - /// messages for the given node will be received. - /// \param subscribe Callback that will be called each time when an element - /// is registered or updated. - /// \param done Callback that will be called when subscription is complete. - /// \return Status - Status AsyncSubscribeAll(const NodeID &node_id, - const SubscribeCallback &subscribe, - const StatusCallback &done); - - /// Subscribe to operations of an element. - /// Repeated subscription to an element will return a failure. - /// - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each update will be received. Else, only - /// messages for the given node will be received. - /// \param id The id of the element to be subscribe to. - /// \param subscribe Callback that will be called each time when the element - /// is registered or updated. - /// \param done Callback that will be called when subscription is complete. - /// \return Status - Status AsyncSubscribe(const NodeID &node_id, const ID &id, - const SubscribeCallback &subscribe, - const StatusCallback &done); - - /// Cancel subscription to an element. - /// Unsubscribing can only be called after the subscription request is completed. - /// - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each update will be received. Else, only - /// messages for the given node will be received. - /// \param id The id of the element to be unsubscribed to. - /// \param done Callback that will be called when cancel subscription is complete. - /// \return Status - Status AsyncUnsubscribe(const NodeID &node_id, const ID &id, - const StatusCallback &done); - - private: - Table &table_; - - std::mutex mutex_; - - enum class RegistrationStatus : uint8_t { - kNotRegistered, - kRegistering, - kRegistered, - }; - - /// Whether successfully registered subscription to GCS. - RegistrationStatus registration_status_{RegistrationStatus::kNotRegistered}; - - /// List of subscriptions before registration to GCS is done, these callbacks - /// will be called when the registration to GCS finishes. - std::list pending_subscriptions_; - - /// Subscribe Callback of all elements. - SubscribeCallback subscribe_all_callback_{nullptr}; - - /// A mapping from element ID to subscription callback. - typedef std::unordered_map> IDToCallbackMap; - IDToCallbackMap id_to_callback_map_; -}; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/tables.cc b/src/ray/gcs/tables.cc deleted file mode 100644 index 2017d05de..000000000 --- a/src/ray/gcs/tables.cc +++ /dev/null @@ -1,847 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ray/gcs/tables.h" - -#include "absl/time/clock.h" -#include "ray/common/common_protocol.h" -#include "ray/common/grpc_util.h" -#include "ray/common/ray_config.h" -#include "ray/gcs/redis_gcs_client.h" - -extern "C" { -#include "hiredis/hiredis.h" -} - -namespace { - -static const std::string kTableAppendCommand = "RAY.TABLE_APPEND"; -static const std::string kChainTableAppendCommand = "RAY.CHAIN.TABLE_APPEND"; - -static const std::string kTableAddCommand = "RAY.TABLE_ADD"; -static const std::string kChainTableAddCommand = "RAY.CHAIN.TABLE_ADD"; - -std::string GetLogAppendCommand(const ray::gcs::CommandType command_type) { - if (command_type == ray::gcs::CommandType::kRegular) { - return kTableAppendCommand; - } else { - RAY_CHECK(command_type == ray::gcs::CommandType::kChain); - return kChainTableAppendCommand; - } -} - -std::string GetTableAddCommand(const ray::gcs::CommandType command_type) { - if (command_type == ray::gcs::CommandType::kRegular) { - return kTableAddCommand; - } else { - RAY_CHECK(command_type == ray::gcs::CommandType::kChain); - return kChainTableAddCommand; - } -} - -} // namespace - -namespace ray { - -namespace gcs { - -template -Status Log::Append(const JobID &job_id, const ID &id, - const std::shared_ptr &data, - const WriteCallback &done) { - num_appends_++; - auto callback = [this, id, data, done](std::shared_ptr reply) { - const auto status = reply->ReadAsStatus(); - // Failed to append the entry. - RAY_CHECK(status.ok()) << "Failed to execute command TABLE_APPEND:" - << status.ToString(); - if (done != nullptr) { - (done)(client_, id, *data); - } - }; - std::string str = data->SerializeAsString(); - return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(), - str.length(), prefix_, pubsub_channel_, - std::move(callback)); -} - -template -Status Log::SyncAppend(const JobID &job_id, const ID &id, - const std::shared_ptr &data) { - num_appends_++; - std::string str = data->SerializeAsString(); - auto reply = - GetRedisContext(id)->RunSync(GetLogAppendCommand(command_type_), id, str.data(), - str.length(), prefix_, pubsub_channel_); - Status status = reply ? reply->ReadAsStatus() : Status::RedisError("Redis error"); - return status; -} - -template -Status Log::AppendAt(const JobID &job_id, const ID &id, - const std::shared_ptr &data, - const WriteCallback &done, const WriteCallback &failure, - int log_length) { - num_appends_++; - auto callback = [this, id, data, done, failure](std::shared_ptr reply) { - const auto status = reply->ReadAsStatus(); - if (status.ok()) { - if (done != nullptr) { - (done)(client_, id, *data); - } - } else { - if (failure != nullptr) { - (failure)(client_, id, *data); - } - } - }; - std::string str = data->SerializeAsString(); - return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(), - str.length(), prefix_, pubsub_channel_, - std::move(callback), log_length); -} - -template -Status Log::Lookup(const JobID &job_id, const ID &id, const Callback &lookup) { - num_lookups_++; - auto callback = [this, id, lookup](std::shared_ptr reply) { - if (lookup != nullptr) { - std::vector results; - if (!reply->IsNil()) { - GcsEntry gcs_entry; - gcs_entry.ParseFromString(reply->ReadAsString()); - RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id); - for (int64_t i = 0; i < gcs_entry.entries_size(); i++) { - Data data; - data.ParseFromString(gcs_entry.entries(i)); - results.emplace_back(std::move(data)); - } - } - lookup(client_, id, results); - } - }; - std::vector nil; - return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -Status Log::Subscribe(const JobID &job_id, const NodeID &node_id, - const Callback &subscribe, - const SubscriptionCallback &done) { - auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id, - const GcsChangeMode change_mode, - const std::vector &data) { - RAY_CHECK(change_mode != GcsChangeMode::REMOVE); - subscribe(client, id, data); - }; - return Subscribe(job_id, node_id, subscribe_wrapper, done); -} - -template -Status Log::Subscribe(const JobID &job_id, const NodeID &node_id, - const NotificationCallback &subscribe, - const SubscriptionCallback &done) { - RAY_CHECK(subscribe_callback_index_ == -1) - << "Client called Subscribe twice on the same table"; - auto callback = [this, subscribe, done](std::shared_ptr reply) { - const auto data = reply->ReadAsPubsubData(); - - if (data.empty()) { - // No notification data is provided. This is the callback for the - // initial subscription request. - if (done != nullptr) { - done(client_); - } - } else { - // Data is provided. This is the callback for a message. - if (subscribe != nullptr) { - // Parse the notification. - GcsEntry gcs_entry; - gcs_entry.ParseFromString(data); - ID id = ID::FromBinary(gcs_entry.id()); - std::vector results; - for (int64_t i = 0; i < gcs_entry.entries_size(); i++) { - Data result; - result.ParseFromString(gcs_entry.entries(i)); - results.emplace_back(std::move(result)); - } - subscribe(client_, id, gcs_entry.change_mode(), results); - } - } - }; - - subscribe_callback_index_ = 1; - for (auto &context : shard_contexts_) { - RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback, - &subscribe_callback_index_)); - } - return Status::OK(); -} - -template -Status Log::RequestNotifications(const JobID &job_id, const ID &id, - const NodeID &node_id, - const StatusCallback &done) { - RAY_CHECK(subscribe_callback_index_ >= 0) - << "Client requested notifications on a key before Subscribe completed"; - - RedisCallback callback = nullptr; - if (done != nullptr) { - callback = [done](std::shared_ptr reply) { - const auto status = reply->IsNil() - ? Status::OK() - : Status::RedisError("request notifications failed."); - done(status); - }; - } - - return GetRedisContext(id)->RunAsync("RAY.TABLE_REQUEST_NOTIFICATIONS", id, - node_id.Data(), node_id.Size(), prefix_, - pubsub_channel_, callback); -} - -template -Status Log::CancelNotifications(const JobID &job_id, const ID &id, - const NodeID &node_id, - const StatusCallback &done) { - RAY_CHECK(subscribe_callback_index_ >= 0) - << "Client canceled notifications on a key before Subscribe completed"; - - RedisCallback callback = nullptr; - if (done != nullptr) { - callback = [done](std::shared_ptr reply) { - const auto status = reply->ReadAsStatus(); - done(status); - }; - } - - return GetRedisContext(id)->RunAsync("RAY.TABLE_CANCEL_NOTIFICATIONS", id, - node_id.Data(), node_id.Size(), prefix_, - pubsub_channel_, callback); -} - -template -void Log::Delete(const JobID &job_id, const std::vector &ids) { - if (ids.empty()) { - return; - } - std::unordered_map sharded_data; - for (const auto &id : ids) { - sharded_data[GetRedisContext(id).get()] << id.Binary(); - } - // Breaking really large deletion commands into batches of smaller size. - const size_t batch_size = - RayConfig::instance().maximum_gcs_deletion_batch_size() * ID::Size(); - for (const auto &pair : sharded_data) { - std::string current_data = pair.second.str(); - for (size_t cur = 0; cur < pair.second.str().size(); cur += batch_size) { - size_t data_field_size = std::min(batch_size, current_data.size() - cur); - uint16_t id_count = data_field_size / ID::Size(); - // Send data contains id count and all the id data. - std::string send_data(data_field_size + sizeof(id_count), 0); - uint8_t *buffer = reinterpret_cast(&send_data[0]); - *reinterpret_cast(buffer) = id_count; - RAY_IGNORE_EXPR( - std::copy_n(reinterpret_cast(current_data.c_str() + cur), - data_field_size, buffer + sizeof(uint16_t))); - - RAY_IGNORE_EXPR( - pair.first->RunAsync("RAY.TABLE_DELETE", UniqueID::Nil(), - reinterpret_cast(send_data.c_str()), - send_data.size(), prefix_, pubsub_channel_, - /*redisCallback=*/nullptr)); - } - } -} - -template -void Log::Delete(const JobID &job_id, const ID &id) { - Delete(job_id, std::vector({id})); -} - -template -std::string Log::DebugString() const { - std::stringstream result; - result << "num lookups: " << num_lookups_ << ", num appends: " << num_appends_; - return result.str(); -} - -template -Status Table::Add(const JobID &job_id, const ID &id, - const std::shared_ptr &data, - const WriteCallback &done) { - num_adds_++; - auto callback = [this, id, data, done](std::shared_ptr reply) { - if (done != nullptr) { - (done)(client_, id, *data); - } - }; - std::string str = data->SerializeAsString(); - return GetRedisContext(id)->RunAsync(GetTableAddCommand(command_type_), id, str.data(), - str.length(), prefix_, pubsub_channel_, - std::move(callback)); -} - -template -Status Table::Lookup(const JobID &job_id, const ID &id, const Callback &lookup, - const FailureCallback &failure) { - num_lookups_++; - return Log::Lookup(job_id, id, - [lookup, failure](RedisGcsClient *client, const ID &id, - const std::vector &data) { - if (data.empty()) { - if (failure != nullptr) { - (failure)(client, id); - } - } else { - RAY_CHECK(data.size() == 1); - if (lookup != nullptr) { - (lookup)(client, id, data[0]); - } - } - }); -} - -template -Status Table::Subscribe(const JobID &job_id, const NodeID &node_id, - const Callback &subscribe, - const FailureCallback &failure, - const SubscriptionCallback &done) { - return Log::Subscribe( - job_id, node_id, - [subscribe, failure](RedisGcsClient *client, const ID &id, - const std::vector &data) { - RAY_CHECK(data.empty() || data.size() == 1); - if (data.size() == 1) { - subscribe(client, id, data[0]); - } else { - if (failure != nullptr) { - failure(client, id); - } - } - }, - done); -} - -template -Status Table::Subscribe(const JobID &job_id, const NodeID &node_id, - const Callback &subscribe, - const SubscriptionCallback &done) { - return Subscribe(job_id, node_id, subscribe, /*failure*/ nullptr, done); -} - -template -std::string Table::DebugString() const { - std::stringstream result; - result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_; - return result.str(); -} - -template -Status Set::Add(const JobID &job_id, const ID &id, - const std::shared_ptr &data, const WriteCallback &done) { - num_adds_++; - auto callback = [this, id, data, done](std::shared_ptr reply) { - if (done != nullptr) { - (done)(client_, id, *data); - } - }; - std::string str = data->SerializeAsString(); - return GetRedisContext(id)->RunAsync("RAY.SET_ADD", id, str.data(), str.length(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -Status Set::Remove(const JobID &job_id, const ID &id, - const std::shared_ptr &data, - const WriteCallback &done) { - num_removes_++; - auto callback = [this, id, data, done](std::shared_ptr reply) { - if (done != nullptr) { - (done)(client_, id, *data); - } - }; - std::string str = data->SerializeAsString(); - return GetRedisContext(id)->RunAsync("RAY.SET_REMOVE", id, str.data(), str.length(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -Status Set::Subscribe(const JobID &job_id, const NodeID &node_id, - const NotificationCallback &subscribe, - const SubscriptionCallback &done) { - auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id, - const GcsChangeMode change_mode, - const std::vector &data) { - ArrayNotification change_notification(change_mode, data); - std::vector> notification_vec; - notification_vec.emplace_back(std::move(change_notification)); - subscribe(client, id, notification_vec); - }; - return Log::Subscribe(job_id, node_id, on_subscribe, done); -} - -template -std::string Set::DebugString() const { - std::stringstream result; - result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_ - << ", num removes: " << num_removes_; - return result.str(); -} - -template -Status Hash::Update(const JobID &job_id, const ID &id, const DataMap &data_map, - const HashCallback &done) { - num_adds_++; - auto callback = [this, id, data_map, done](std::shared_ptr reply) { - if (done != nullptr) { - (done)(client_, id, data_map); - } - }; - GcsEntry gcs_entry; - gcs_entry.set_id(id.Binary()); - gcs_entry.set_change_mode(GcsChangeMode::APPEND_OR_ADD); - for (const auto &pair : data_map) { - gcs_entry.add_entries(pair.first); - gcs_entry.add_entries(pair.second->SerializeAsString()); - } - std::string str = gcs_entry.SerializeAsString(); - return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -Status Hash::RemoveEntries(const JobID &job_id, const ID &id, - const std::vector &keys, - const HashRemoveCallback &remove_callback) { - num_removes_++; - auto callback = [this, id, keys, - remove_callback](std::shared_ptr reply) { - if (remove_callback != nullptr) { - (remove_callback)(client_, id, keys); - } - }; - GcsEntry gcs_entry; - gcs_entry.set_id(id.Binary()); - gcs_entry.set_change_mode(GcsChangeMode::REMOVE); - for (const auto &key : keys) { - gcs_entry.add_entries(key); - } - std::string str = gcs_entry.SerializeAsString(); - return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -std::string Hash::DebugString() const { - std::stringstream result; - result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_ - << ", num removes: " << num_removes_; - return result.str(); -} - -template -Status Hash::Lookup(const JobID &job_id, const ID &id, - const HashCallback &lookup) { - num_lookups_++; - auto callback = [this, id, lookup](std::shared_ptr reply) { - if (lookup != nullptr) { - DataMap results; - if (!reply->IsNil()) { - const auto data = reply->ReadAsString(); - GcsEntry gcs_entry; - gcs_entry.ParseFromString(reply->ReadAsString()); - RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id); - RAY_CHECK(gcs_entry.entries_size() % 2 == 0); - for (int i = 0; i < gcs_entry.entries_size(); i += 2) { - const auto &key = gcs_entry.entries(i); - const auto value = std::make_shared(); - value->ParseFromString(gcs_entry.entries(i + 1)); - results.emplace(key, std::move(value)); - } - } - lookup(client_, id, results); - } - }; - std::vector nil; - return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(), - prefix_, pubsub_channel_, std::move(callback)); -} - -template -Status Hash::Subscribe(const JobID &job_id, const NodeID &node_id, - const HashNotificationCallback &subscribe, - const SubscriptionCallback &done) { - RAY_CHECK(subscribe_callback_index_ == -1) - << "Client called Subscribe twice on the same table"; - auto callback = [this, subscribe, done](std::shared_ptr reply) { - const auto data = reply->ReadAsPubsubData(); - if (data.empty()) { - // No notification data is provided. This is the callback for the - // initial subscription request. - if (done != nullptr) { - done(client_); - } - } else { - // Data is provided. This is the callback for a message. - if (subscribe != nullptr) { - // Parse the notification. - GcsEntry gcs_entry; - gcs_entry.ParseFromString(data); - ID id = ID::FromBinary(gcs_entry.id()); - DataMap data_map; - if (gcs_entry.change_mode() == GcsChangeMode::REMOVE) { - for (const auto &key : gcs_entry.entries()) { - data_map.emplace(key, std::shared_ptr()); - } - } else { - RAY_CHECK(gcs_entry.entries_size() % 2 == 0); - for (int i = 0; i < gcs_entry.entries_size(); i += 2) { - const auto &key = gcs_entry.entries(i); - const auto value = std::make_shared(); - value->ParseFromString(gcs_entry.entries(i + 1)); - data_map.emplace(key, std::move(value)); - } - } - MapNotification notification(gcs_entry.change_mode(), - data_map); - std::vector> notification_vec; - notification_vec.emplace_back(std::move(notification)); - subscribe(client_, id, notification_vec); - } - } - }; - - subscribe_callback_index_ = 1; - for (auto &context : shard_contexts_) { - RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback, - &subscribe_callback_index_)); - } - return Status::OK(); -} - -std::string ProfileTable::DebugString() const { - return Log::DebugString(); -} - -void NodeTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback) { - RAY_CHECK(node_change_callback_ == nullptr); - node_change_callback_ = callback; - // Call the callback for any added clients that are cached. - for (const auto &entry : node_cache_) { - if (!entry.first.IsNil()) { - RAY_CHECK(entry.second.state() == GcsNodeInfo::ALIVE || - entry.second.state() == GcsNodeInfo::DEAD); - node_change_callback_(entry.first, entry.second); - } - } -} - -void NodeTable::HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info) { - NodeID node_id = NodeID::FromBinary(node_info.node_id()); - bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE); - // It's possible to get duplicate notifications from the client table, so - // check whether this notification is new. - auto entry = node_cache_.find(node_id); - bool is_notif_new; - if (entry == node_cache_.end()) { - // If the entry is not in the cache, then the notification is new. - is_notif_new = true; - } else { - // If the entry is in the cache, then the notification is new if the client - // was alive and is now dead or resources have been updated. - bool was_alive = (entry->second.state() == GcsNodeInfo::ALIVE); - is_notif_new = was_alive && !is_alive; - // Once a node with a given ID has been removed, it should never be added - // again. If the entry was in the cache and the node was deleted, check - // that this new notification is not an insertion. - if (!was_alive) { - RAY_CHECK(!is_alive) - << "Notification for addition of a node that was already removed:" << node_id; - } - } - - // Add the notification to our cache. Notifications are idempotent. - RAY_LOG(DEBUG) << "[NodeTableNotification] NodeTable Insertion/Deletion " - "notification for node id " - << node_id << ". IsAlive: " << is_alive - << ". Setting the node cache to data."; - node_cache_[node_id] = node_info; - - // If the notification is new, call any registered callbacks. - GcsNodeInfo &cache_data = node_cache_[node_id]; - if (is_notif_new) { - if (is_alive) { - RAY_CHECK(removed_nodes_.find(node_id) == removed_nodes_.end()); - } else { - // NOTE(swang): The node should be added to this data structure before - // the callback gets called, in case the callback depends on the data - // structure getting updated. - removed_nodes_.insert(node_id); - } - if (node_change_callback_ != nullptr) { - node_change_callback_(node_id, cache_data); - } - } -} - -const NodeID &NodeTable::GetLocalNodeId() const { - RAY_CHECK(!local_node_id_.IsNil()); - return local_node_id_; -} - -const GcsNodeInfo &NodeTable::GetLocalNode() const { return local_node_info_; } - -bool NodeTable::IsRemoved(const NodeID &node_id) const { - return removed_nodes_.count(node_id) == 1; -} - -Status NodeTable::Connect(const GcsNodeInfo &local_node_info) { - RAY_CHECK(!disconnected_) << "Tried to reconnect a disconnected node."; - RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected."; - RAY_CHECK(local_node_info.state() == GcsNodeInfo::ALIVE); - - auto node_info_ptr = std::make_shared(local_node_info); - Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr); - if (status.ok()) { - local_node_id_ = NodeID::FromBinary(local_node_info.node_id()); - local_node_info_ = local_node_info; - } - return status; -} - -Status NodeTable::Disconnect() { - local_node_info_.set_state(GcsNodeInfo::DEAD); - auto node_info_ptr = std::make_shared(local_node_info_); - Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr); - - if (status.ok()) { - // We successfully added the deletion entry. Mark ourselves as disconnected. - disconnected_ = true; - } - return status; -} - -ray::Status NodeTable::MarkConnected(const GcsNodeInfo &node_info, - const WriteCallback &done) { - RAY_CHECK(node_info.state() == GcsNodeInfo::ALIVE); - auto node_info_ptr = std::make_shared(node_info); - return Append(JobID::Nil(), node_log_key_, node_info_ptr, done); -} - -ray::Status NodeTable::MarkDisconnected(const NodeID &dead_node_id, - const WriteCallback &done) { - auto node_info = std::make_shared(); - node_info->set_node_id(dead_node_id.Binary()); - node_info->set_state(GcsNodeInfo::DEAD); - return Append(JobID::Nil(), node_log_key_, node_info, done); -} - -ray::Status NodeTable::SubscribeToNodeChange( - const SubscribeCallback &subscribe, const StatusCallback &done) { - // Callback for a notification from the client table. - auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key, - const std::vector ¬ifications) { - RAY_CHECK(log_key == node_log_key_); - std::unordered_map connected_nodes; - std::unordered_map disconnected_nodes; - for (auto ¬ification : notifications) { - // This is temporary fix for Issue 4140 to avoid connect to dead nodes. - // TODO(yuhguo): remove this temporary fix after GCS entry is removable. - if (notification.state() == GcsNodeInfo::ALIVE) { - connected_nodes.emplace(notification.node_id(), notification); - } else { - auto iter = connected_nodes.find(notification.node_id()); - if (iter != connected_nodes.end()) { - connected_nodes.erase(iter); - } - disconnected_nodes.emplace(notification.node_id(), notification); - } - } - for (const auto &pair : connected_nodes) { - HandleNotification(client, pair.second); - } - for (const auto &pair : disconnected_nodes) { - HandleNotification(client, pair.second); - } - }; - - // Callback to request notifications from the client table once we've - // successfully subscribed. - auto on_done = [this, subscribe, done](RedisGcsClient *client) { - auto on_request_notification_done = [this, subscribe, done](Status status) { - RAY_CHECK_OK(status); - if (done != nullptr) { - done(status); - } - // Register node change callbacks after RequestNotification finishes. - RegisterNodeChangeCallback(subscribe); - }; - RAY_CHECK_OK(RequestNotifications(JobID::Nil(), node_log_key_, subscribe_id_, - on_request_notification_done)); - }; - - // Subscribe to the client table. - return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done); -} - -bool NodeTable::GetNode(const NodeID &node_id, GcsNodeInfo *node_info) const { - RAY_CHECK(!node_id.IsNil()); - auto entry = node_cache_.find(node_id); - auto found = (entry != node_cache_.end()); - if (found) { - *node_info = entry->second; - } - return found; -} - -const std::unordered_map &NodeTable::GetAllNodes() const { - return node_cache_; -} - -Status NodeTable::Lookup(const Callback &lookup) { - RAY_CHECK(lookup != nullptr); - return Log::Lookup(JobID::Nil(), node_log_key_, lookup); -} - -std::string NodeTable::DebugString() const { - std::stringstream result; - result << Log::DebugString(); - result << ", cache size: " << node_cache_.size() - << ", num removed: " << removed_nodes_.size(); - return result.str(); -} - -Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &node_id, - const Callback &subscribe, - const SubscriptionCallback &done) { - auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id, - const std::vector &data) { - std::vector> result; - for (const auto &item : data) { - boost::optional optional_item(item); - result.emplace_back(std::move(optional_item)); - } - if (result.empty()) { - boost::optional optional_item; - result.emplace_back(std::move(optional_item)); - } - subscribe(client, task_id, result); - }; - return Table::Subscribe(job_id, node_id, on_subscribe, done); -} - -std::vector SyncGetAllActorID(redisContext *redis_context, - const std::string &table_prefix) { - std::unordered_set actor_id_set; - size_t cursor = 0; - do { - auto r = redisCommand(redis_context, "SCAN %d match %s* count 100", cursor, - table_prefix.c_str()); - auto reply = reinterpret_cast(r); - RAY_CHECK(reply != nullptr && reply->type == REDIS_REPLY_ARRAY); - RAY_CHECK(reply->elements == 2); - - // current cursor - redisReply *cursor_reply = reply->element[0]; - RAY_CHECK(cursor_reply != nullptr && cursor_reply->type == REDIS_REPLY_STRING); - cursor = std::stoi(std::string(cursor_reply->str, cursor_reply->len)); - - // actor ids - redisReply *array_reply = reply->element[1]; - RAY_CHECK(array_reply != nullptr && array_reply->type == REDIS_REPLY_ARRAY); - for (size_t i = 0; i < array_reply->elements; ++i) { - redisReply *id_reply = array_reply->element[i]; - RAY_CHECK(id_reply != nullptr && id_reply->type == REDIS_REPLY_STRING); - auto id_with_prefix = std::string(id_reply->str, id_reply->len); - // The key of actor_checkpoint table and actor_checkpoint_id table have the same - // prefix of `ACTOR`, so we should check the length of the key to filter them. - if (id_with_prefix.size() == table_prefix.size() + ActorID::Size()) { - auto id = ActorID::FromBinary(id_with_prefix.substr(table_prefix.size())); - actor_id_set.emplace(id); - } - } - } while (cursor != 0); - std::vector actor_id_list; - actor_id_list.reserve(actor_id_set.size()); - actor_id_list.insert(actor_id_list.end(), actor_id_set.begin(), actor_id_set.end()); - return actor_id_list; -} - -std::vector LogBasedActorTable::GetAllActorID() { - auto redis_context = client_->primary_context()->sync_context(); - return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_)); -} - -Status LogBasedActorTable::Get(const ray::ActorID &actor_id, - ray::rpc::ActorTableData *actor_table_data) { - RAY_CHECK(actor_table_data != nullptr); - auto key = TablePrefix_Name(prefix_) + actor_id.Binary(); - auto reply = GetRedisContext(actor_id)->RunArgvSync({"LRANGE", key, "-1", "-1"}); - if (!reply || reply->IsNil()) { - return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex()); - } - - const auto &data_list = reply->ReadAsStringArray(); - if (data_list.empty()) { - return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex()); - } - - RAY_CHECK(data_list.size() == 1); - actor_table_data->ParseFromString(data_list.front()); - return Status::OK(); -} - -std::vector ActorTable::GetAllActorID() { - auto redis_context = client_->primary_context()->sync_context(); - return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_)); -} - -Status ActorTable::Get(const ray::ActorID &actor_id, - ray::rpc::ActorTableData *actor_table_data) { - RAY_CHECK(actor_table_data != nullptr); - auto key = TablePrefix_Name(prefix_) + actor_id.Binary(); - auto reply = GetRedisContext(actor_id)->RunArgvSync({"GET", key}); - if (!reply || reply->IsNil()) { - return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex()); - } - actor_table_data->ParseFromString(reply->ReadAsString()); - return Status::OK(); -} - -template class Log; -template class Set; -template class Log; -template class Table; -template class Log; -template class Log; -template class Table; -template class Table; -template class Table; -template class Log; -template class Log; -template class Log; -template class Log; -template class Log; -template class Log; -template class Table; -template class Table; - -template class Log; -template class Hash; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/tables.h b/src/ray/gcs/tables.h deleted file mode 100644 index c7c647162..000000000 --- a/src/ray/gcs/tables.h +++ /dev/null @@ -1,978 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "ray/common/constants.h" -#include "ray/common/id.h" -#include "ray/common/status.h" -#include "ray/gcs/callback.h" -#include "ray/gcs/entry_change_notification.h" -#include "ray/gcs/redis_context.h" -#include "ray/util/logging.h" -#include "src/ray/protobuf/gcs.pb.h" - -struct redisAsyncContext; - -namespace ray { - -namespace gcs { - -using rpc::ActorTableData; -using rpc::ErrorTableData; -using rpc::GcsChangeMode; -using rpc::GcsEntry; -using rpc::GcsNodeInfo; -using rpc::HeartbeatTableData; -using rpc::JobTableData; -using rpc::ObjectTableData; -using rpc::ProfileTableData; -using rpc::ResourceTableData; -using rpc::ResourceUsageBatchData; -using rpc::TablePrefix; -using rpc::TablePubsub; -using rpc::TaskLeaseData; -using rpc::TaskReconstructionData; -using rpc::TaskTableData; -using rpc::WorkerTableData; - -class RedisContext; - -class RedisGcsClient; - -/// Specifies whether commands issued to a table should be regular or chain-replicated -/// (when available). -enum class CommandType { kRegular, kChain, kUnknown }; - -/// \class PubsubInterface -/// -/// The interface for a pubsub storage system. The client of a storage system -/// that implements this interface can request and cancel notifications for -/// specific keys. -template -class PubsubInterface { - public: - virtual Status RequestNotifications(const JobID &job_id, const ID &id, - const NodeID &node_id, - const StatusCallback &done) = 0; - virtual Status CancelNotifications(const JobID &job_id, const ID &id, - const NodeID &node_id, - const StatusCallback &done) = 0; - virtual ~PubsubInterface(){}; -}; - -template -class LogInterface { - public: - using WriteCallback = - std::function; - virtual Status Append(const JobID &job_id, const ID &id, - const std::shared_ptr &data, const WriteCallback &done) = 0; - virtual Status AppendAt(const JobID &job_id, const ID &id, - const std::shared_ptr &data, const WriteCallback &done, - const WriteCallback &failure, int log_length) = 0; - virtual ~LogInterface(){}; -}; - -/// \class Log -/// -/// A GCS table where every entry is an append-only log. This class is not -/// meant to be used directly. All log classes should derive from this class -/// and override the prefix_ member with a unique prefix for that log, and the -/// pubsub_channel_ member if pubsub is required. -/// -/// Example tables backed by Log: -/// NodeTable: Stores a log of which GCS clients have been added or deleted -/// from the system. -template -class Log : public LogInterface, virtual public PubsubInterface { - public: - using Callback = std::function &data)>; - - using NotificationCallback = - std::function &data)>; - - /// The callback to call when a write to a key succeeds. - using WriteCallback = typename LogInterface::WriteCallback; - /// The callback to call when a SUBSCRIBE call completes and we are ready to - /// request and receive notifications. - using SubscriptionCallback = std::function; - - struct CallbackData { - ID id; - std::shared_ptr data; - Callback callback; - // An optional callback to call for subscription operations, where the - // first message is a notification of subscription success. - SubscriptionCallback subscription_callback; - Log *log; - RedisGcsClient *client; - }; - - Log(const std::vector> &contexts, RedisGcsClient *client) - : shard_contexts_(contexts), - client_(client), - pubsub_channel_(TablePubsub::NO_PUBLISH), - prefix_(TablePrefix::UNUSED), - subscribe_callback_index_(-1){}; - - /// Append a log entry to a key. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param data Data to append to the log. TODO(rkn): This can be made const, - /// right? - /// \param done Callback that is called once the data has been written to the - /// GCS. - /// \return Status - Status Append(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done); - - /// Append a log entry to a key synchronously. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param data Data to append to the log. - /// \return Status - Status SyncAppend(const JobID &job_id, const ID &id, const std::shared_ptr &data); - - /// Append a log entry to a key if and only if the log has the given number - /// of entries. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param data Data to append to the log. - /// \param done Callback that is called if the data was appended to the log. - /// \param failure Callback that is called if the data was not appended to - /// the log because the log length did not match the given `log_length`. - /// \param log_length The number of entries that the log must have for the - /// append to succeed. - /// \return Status - Status AppendAt(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done, const WriteCallback &failure, - int log_length); - - /// Lookup the log values at a key asynchronously. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is looked up in the GCS. - /// \param lookup Callback that is called after lookup. If the callback is - /// called with an empty vector, then there was no data at the key. - /// \return Status - Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup); - - /// Subscribe to any Append operations to this table. The caller may choose - /// requests notifications for. This may only be called once per Log - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each Add to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe Callback that is called on each received message. If the - /// callback is called with an empty vector, then there was no data at the key. - /// \param done Callback that is called when subscription is complete and we - /// are ready to receive messages. - /// \return Status - Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe, - const SubscriptionCallback &done); - - /// Request notifications about a key in this table. - /// - /// The notifications will be returned via the subscribe callback that was - /// registered by `Subscribe`. An initial notification will be returned for - /// the current values at the key, if any, and a subsequent notification will - /// be published for every following `Append` to the key. Before - /// notifications can be requested, the caller must first call `Subscribe`, - /// with the same `node_id`. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the key to request notifications for. - /// \param node_id The node who is requesting notifications. - /// \param done Callback that is called when request notifications is complete. - /// notifications can be requested, a call to `Subscribe` to this - /// table with the same `node_id` must complete successfully. - /// \return Status - Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &node_id, - const StatusCallback &done); - - /// Cancel notifications about a key in this table. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the key to request notifications for. - /// \param node_id The node who originally requested notifications. - /// \param done Callback that is called when cancel notifications is complete. - /// \return Status - Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &node_id, - const StatusCallback &done); - - /// Subscribe to any modifications to the key. The caller may choose - /// to subscribe to all modifications, or to subscribe only to keys that it - /// requests notifications for. This may only be called once per Log - /// instance. This function is different from public version due to - /// an additional parameter change_mode in NotificationCallback. Therefore this - /// function supports notifications of remove operations. - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each Add to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe Callback that is called on each received message. If the - /// callback is called with an empty vector, then there was no data at the key. - /// \param done Callback that is called when subscription is complete and we - /// are ready to receive messages. - /// \return Status - Status Subscribe(const JobID &job_id, const NodeID &node_id, - const NotificationCallback &subscribe, - const SubscriptionCallback &done); - - /// Delete an entire key from redis. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data to delete from the GCS. - /// \return Void. - void Delete(const JobID &job_id, const ID &id); - - /// Delete several keys from redis. - /// - /// \param job_id The ID of the job. - /// \param ids The vector of IDs to delete from the GCS. - /// \return Void. - void Delete(const JobID &job_id, const std::vector &ids); - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; - - protected: - std::shared_ptr GetRedisContext(const ID &id) { - static std::hash index; - return shard_contexts_[index(id) % shard_contexts_.size()]; - } - - /// The connection to the GCS. - std::vector> shard_contexts_; - /// The GCS client. - RedisGcsClient *client_; - /// The pubsub channel to subscribe to for notifications about keys in this - /// table. If no notifications are required, this should be set to - /// TablePubsub_NO_PUBLISH. If notifications are required, then this must be - /// unique across all instances of Log. - TablePubsub pubsub_channel_; - /// The prefix to use for keys in this table. This must be unique across all - /// instances of Log. - TablePrefix prefix_; - /// The index in the RedisCallbackManager for the callback that is called - /// when we receive notifications. This is >= 0 iff we have subscribed to the - /// table, otherwise -1. - int64_t subscribe_callback_index_; - - /// Commands to a GCS table can either be regular (default) or chain-replicated. - CommandType command_type_ = CommandType::kRegular; - - int64_t num_appends_ = 0; - int64_t num_lookups_ = 0; -}; - -template -class TableInterface { - public: - using WriteCallback = typename Log::WriteCallback; - virtual Status Add(const JobID &job_id, const ID &task_id, - const std::shared_ptr &data, const WriteCallback &done) = 0; - virtual ~TableInterface(){}; -}; - -/// \class Table -/// -/// A GCS table where every entry is a single data item. This class is not -/// meant to be used directly. All table classes should derive from this class -/// and override the prefix_ member with a unique prefix for that table, and -/// the pubsub_channel_ member if pubsub is required. -/// -/// Example tables backed by Log: -/// TaskTable: Stores Task metadata needed for executing the task. -template -class Table : private Log, - public TableInterface, - virtual public PubsubInterface { - public: - using Callback = - std::function; - using WriteCallback = typename Log::WriteCallback; - /// The callback to call when a Lookup call returns an empty entry. - using FailureCallback = std::function; - /// The callback to call when a Subscribe call completes and we are ready to - /// request and receive notifications. - using SubscriptionCallback = typename Log::SubscriptionCallback; - - Table(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) {} - - using Log::RequestNotifications; - using Log::CancelNotifications; - /// Expose this interface for use by subscription tools class SubscriptionExecutor. - /// In this way TaskTable() can also reuse class SubscriptionExecutor. - using Log::Subscribe; - - /// Add an entry to the table. This overwrites any existing data at the key. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param data Data that is added to the GCS. - /// \param done Callback that is called once the data has been written to the - /// GCS. - /// \return Status - Status Add(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done); - - /// Lookup an entry asynchronously. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is looked up in the GCS. - /// \param lookup Callback that is called after lookup if there was data the - /// key. - /// \param failure Callback that is called after lookup if there was no data - /// at the key. - /// \return Status - Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup, - const FailureCallback &failure); - - /// Subscribe to any Add operations to this table. The caller may choose to - /// subscribe to all Adds, or to subscribe only to keys that it requests - /// notifications for. This may only be called once per Table instance. - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each Add to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe Callback that is called on each received message. If the - /// callback is called with an empty vector, then there was no data at the key. - /// \param failure Callback that is called if the key is empty at the time - /// that notifications are requested. - /// \param done Callback that is called when subscription is complete and we - /// are ready to receive messages. - /// \return Status - Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe, - const FailureCallback &failure, const SubscriptionCallback &done); - - /// Subscribe to any Add operations to this table. The caller may choose to - /// subscribe to all Adds, or to subscribe only to keys that it requests - /// notifications for. This may only be called once per Table instance. - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each Add to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe Callback that is called on each received message. If the - /// callback is called with an empty vector, then there was no data at the key. - /// \param done Callback that is called when subscription is complete and we - /// are ready to receive messages. - /// \return Status - Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe, - const SubscriptionCallback &done); - - void Delete(const JobID &job_id, const ID &id) { Log::Delete(job_id, id); } - - void Delete(const JobID &job_id, const std::vector &ids) { - Log::Delete(job_id, ids); - } - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; - - protected: - using Log::shard_contexts_; - using Log::client_; - using Log::pubsub_channel_; - using Log::prefix_; - using Log::command_type_; - using Log::GetRedisContext; - - int64_t num_adds_ = 0; - int64_t num_lookups_ = 0; -}; - -template -class SetInterface { - public: - using WriteCallback = typename Log::WriteCallback; - virtual Status Add(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done) = 0; - virtual Status Remove(const JobID &job_id, const ID &id, - const std::shared_ptr &data, const WriteCallback &done) = 0; - virtual ~SetInterface(){}; -}; - -/// \class Set -/// -/// A GCS table where every entry is an addable & removable set. This class is not -/// meant to be used directly. All set classes should derive from this class -/// and override the prefix_ member with a unique prefix for that set, and the -/// pubsub_channel_ member if pubsub is required. -/// -/// Example tables backed by Set: -/// ObjectTable: Stores a set of which clients have added an object. -template -class Set : private Log, - public SetInterface, - virtual public PubsubInterface { - public: - using Callback = typename Log::Callback; - using WriteCallback = typename Log::WriteCallback; - using SubscriptionCallback = typename Log::SubscriptionCallback; - - Set(const std::vector> &contexts, RedisGcsClient *client) - : Log(contexts, client) {} - - using Log::RequestNotifications; - using Log::CancelNotifications; - using Log::Lookup; - using Log::Delete; - - /// Add an entry to the set. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param data Data to add to the set. - /// \param done Callback that is called once the data has been written to the - /// GCS. - /// \return Status - Status Add(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done); - - /// Remove an entry from the set. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is removed from the GCS. - /// \param data Data to remove from the set. - /// \param done Callback that is called once the data has been written to the - /// GCS. - /// \return Status - Status Remove(const JobID &job_id, const ID &id, const std::shared_ptr &data, - const WriteCallback &done); - - using NotificationCallback = - std::function> &data)>; - /// Subscribe to any add or remove operations to this table. - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each add or remove to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe Callback that is called on each received message. - /// \param done Callback that is called when subscription is complete and we - /// are ready to receive messages. - /// \return Status - Status Subscribe(const JobID &job_id, const NodeID &node_id, - const NotificationCallback &subscribe, - const SubscriptionCallback &done); - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; - - protected: - using Log::shard_contexts_; - using Log::client_; - using Log::pubsub_channel_; - using Log::prefix_; - using Log::GetRedisContext; - - int64_t num_adds_ = 0; - int64_t num_removes_ = 0; - using Log::num_lookups_; -}; - -template -class HashInterface { - public: - using DataMap = std::unordered_map>; - // Reuse Log's SubscriptionCallback when Subscribe is successfully called. - using SubscriptionCallback = typename Log::SubscriptionCallback; - - /// The callback function used by function Update & Lookup. - /// - /// \param client The client on which the RemoveEntries is called. - /// \param id The ID of the Hash Table whose entries are removed. - /// \param data Map data contains the change to the Hash Table. - /// \return Void - using HashCallback = - std::function; - - /// The callback function used by function RemoveEntries. - /// - /// \param client The client on which the RemoveEntries is called. - /// \param id The ID of the Hash Table whose entries are removed. - /// \param keys The keys that are moved from this Hash Table. - /// \return Void - using HashRemoveCallback = std::function &keys)>; - - /// The notification function used by function Subscribe. - /// - /// \param client The client on which the Subscribe is called. - /// \param change_mode The mode to identify the data is removed or updated. - /// \param data Map data contains the change to the Hash Table. - /// \return Void - using HashNotificationCallback = - std::function> &data)>; - - /// Add entries of a hash table. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is added to the GCS. - /// \param pairs Map data to add to the hash table. - /// \param done HashCallback that is called once the request data has been written to - /// the GCS. - /// \return Status - virtual Status Update(const JobID &job_id, const ID &id, const DataMap &pairs, - const HashCallback &done) = 0; - - /// Remove entries from the hash table. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is removed from the GCS. - /// \param keys The entry keys of the hash table. - /// \param remove_callback HashRemoveCallback that is called once the data has been - /// written to the GCS no matter whether the key exists in the hash table. - /// \return Status - virtual Status RemoveEntries(const JobID &job_id, const ID &id, - const std::vector &keys, - const HashRemoveCallback &remove_callback) = 0; - - /// Lookup the map data of a hash table. - /// - /// \param job_id The ID of the job. - /// \param id The ID of the data that is looked up in the GCS. - /// \param lookup HashCallback that is called after lookup. If the callback is - /// called with an empty hash table, then there was no data in the callback. - /// \return Status - virtual Status Lookup(const JobID &job_id, const ID &id, - const HashCallback &lookup) = 0; - - /// Subscribe to any Update or Remove operations to this hash table. - /// - /// \param job_id The ID of the job. - /// \param node_id The type of update to listen to. If this is nil, then a - /// message for each Update to the table will be received. Else, only - /// messages for the given node will be received. In the latter - /// case, the node may request notifications on specific keys in the - /// table via `RequestNotifications`. - /// \param subscribe HashNotificationCallback that is called on each received message. - /// \param done SubscriptionCallback that is called when subscription is complete and - /// we are ready to receive messages. - /// \return Status - virtual Status Subscribe(const JobID &job_id, const NodeID &node_id, - const HashNotificationCallback &subscribe, - const SubscriptionCallback &done) = 0; - - virtual ~HashInterface(){}; -}; - -template -class Hash : private Log, - public HashInterface, - virtual public PubsubInterface { - public: - using DataMap = std::unordered_map>; - using HashCallback = typename HashInterface::HashCallback; - using HashRemoveCallback = typename HashInterface::HashRemoveCallback; - using HashNotificationCallback = - typename HashInterface::HashNotificationCallback; - using SubscriptionCallback = typename Log::SubscriptionCallback; - - Hash(const std::vector> &contexts, RedisGcsClient *client) - : Log(contexts, client) {} - - using Log::RequestNotifications; - using Log::CancelNotifications; - - Status Update(const JobID &job_id, const ID &id, const DataMap &pairs, - const HashCallback &done) override; - - Status Subscribe(const JobID &job_id, const NodeID &node_id, - const HashNotificationCallback &subscribe, - const SubscriptionCallback &done) override; - - Status Lookup(const JobID &job_id, const ID &id, const HashCallback &lookup) override; - - Status RemoveEntries(const JobID &job_id, const ID &id, - const std::vector &keys, - const HashRemoveCallback &remove_callback) override; - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; - - protected: - using Log::shard_contexts_; - using Log::client_; - using Log::pubsub_channel_; - using Log::prefix_; - using Log::subscribe_callback_index_; - using Log::GetRedisContext; - - int64_t num_adds_ = 0; - int64_t num_removes_ = 0; - using Log::num_lookups_; -}; - -class DynamicResourceTable : public Hash { - public: - DynamicResourceTable(const std::vector> &contexts, - RedisGcsClient *client) - : Hash(contexts, client) { - pubsub_channel_ = TablePubsub::NODE_RESOURCE_PUBSUB; - prefix_ = TablePrefix::NODE_RESOURCE; - }; - - virtual ~DynamicResourceTable(){}; -}; - -class ObjectTable : public Set { - public: - ObjectTable(const std::vector> &contexts, - RedisGcsClient *client) - : Set(contexts, client) { - pubsub_channel_ = TablePubsub::OBJECT_PUBSUB; - prefix_ = TablePrefix::OBJECT; - }; - - virtual ~ObjectTable(){}; -}; - -class HeartbeatTable : public Table { - public: - HeartbeatTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::HEARTBEAT_PUBSUB; - prefix_ = TablePrefix::HEARTBEAT; - } - virtual ~HeartbeatTable() {} -}; - -class ResourceUsageBatchTable : public Table { - public: - ResourceUsageBatchTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::RESOURCE_USAGE_BATCH_PUBSUB; - prefix_ = TablePrefix::RESOURCE_USAGE_BATCH; - } - virtual ~ResourceUsageBatchTable() {} -}; - -class JobTable : public Log { - public: - JobTable(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) { - pubsub_channel_ = TablePubsub::JOB_PUBSUB; - prefix_ = TablePrefix::JOB; - }; - - virtual ~JobTable() {} -}; - -/// Log-based Actor table starts with an ALIVE entry, which represents the first time the -/// actor is created. This may be followed by 0 or more pairs of RESTARTING, ALIVE -/// entries, which represent each time the actor fails (RESTARTING) and gets recreated -/// (ALIVE). These may be followed by a DEAD entry, which means that the actor has failed -/// and will not be reconstructed. -class LogBasedActorTable : public Log { - public: - LogBasedActorTable(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) { - pubsub_channel_ = TablePubsub::ACTOR_PUBSUB; - prefix_ = TablePrefix::ACTOR; - } - - /// Get all actor id synchronously. - std::vector GetAllActorID(); - - /// Get actor table data by actor id synchronously. - Status Get(const ActorID &actor_id, ActorTableData *actor_table_data); -}; - -/// Actor table. -/// This table is only used for GCS-based actor management. And when completely migrate to -/// GCS service, the log-based actor table could be removed. -class ActorTable : public Table { - public: - ActorTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::ACTOR_PUBSUB; - prefix_ = TablePrefix::ACTOR; - } - - /// Get all actor id synchronously. - std::vector GetAllActorID(); - - /// Get actor table data by actor id synchronously. - Status Get(const ActorID &actor_id, ActorTableData *actor_table_data); -}; - -class WorkerTable : public Table { - public: - WorkerTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::WORKER_FAILURE_PUBSUB; - prefix_ = TablePrefix::WORKERS; - } - virtual ~WorkerTable() {} -}; - -class TaskReconstructionLog : public Log { - public: - TaskReconstructionLog(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) { - prefix_ = TablePrefix::TASK_RECONSTRUCTION; - } -}; - -class TaskLeaseTable : public Table { - public: - /// Use boost::optional to represent subscription results, so that we can - /// notify raylet whether the entry of task lease is empty. - using Callback = - std::function> &data)>; - - TaskLeaseTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::TASK_LEASE_PUBSUB; - prefix_ = TablePrefix::TASK_LEASE; - } - - Status Add(const JobID &job_id, const TaskID &id, - const std::shared_ptr &data, - const WriteCallback &done) override { - RAY_RETURN_NOT_OK((Table::Add(job_id, id, data, done))); - // Mark the entry for expiration in Redis. It's okay if this command fails - // since the lease entry itself contains the expiration period. In the - // worst case, if the command fails, then a client that looks up the lease - // entry will overestimate the expiration time. - // TODO(swang): Use a common helper function to format the key instead of - // hardcoding it to match the Redis module. - std::vector args = {"PEXPIRE", TablePrefix_Name(prefix_) + id.Binary(), - std::to_string(data->timeout())}; - - return GetRedisContext(id)->RunArgvAsync(args); - } - - /// Implement this method for the subscription tools class SubscriptionExecutor. - /// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor. - Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe, - const SubscriptionCallback &done); -}; - -namespace raylet { - -class TaskTable : public Table { - public: - TaskTable(const std::vector> &contexts, - RedisGcsClient *client) - : Table(contexts, client) { - pubsub_channel_ = TablePubsub::RAYLET_TASK_PUBSUB; - prefix_ = TablePrefix::RAYLET_TASK; - } - - TaskTable(const std::vector> &contexts, - RedisGcsClient *client, gcs::CommandType command_type) - : TaskTable(contexts, client) { - command_type_ = command_type; - }; -}; - -} // namespace raylet - -class ProfileTable : public Log { - public: - ProfileTable(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) { - prefix_ = TablePrefix::PROFILE; - }; - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; -}; - -/// \class NodeTable -/// -/// The NodeTable stores information about active and inactive nodes. It is -/// structured as a single log stored at a key known to all nodes. When a -/// node connects, it appends an entry to the log indicating that it is -/// alive. When a node disconnects, or if another node detects its failure, -/// it should append an entry to the log indicating that it is dead. A node -/// that is marked as dead should never again be marked as alive; if it needs -/// to reconnect, it must connect with a different NodeID. -class NodeTable : public Log { - public: - NodeTable(const std::vector> &contexts, - RedisGcsClient *client) - : Log(contexts, client) { - pubsub_channel_ = TablePubsub::NODE_PUBSUB; - prefix_ = TablePrefix::NODE; - }; - - /// Connect as a NODE to the GCS. This registers us in the NODE table - /// and begins subscription to NODE table notifications. - /// - /// \param local_node_info Information about the connecting NODE. This must have the - /// same id as the one set in the NODE table. - /// \return Status - ray::Status Connect(const GcsNodeInfo &local_node_info); - - /// Disconnect the NODE from the GCS. The NODE ID assigned during - /// registration should never be reused after disconnecting. - /// - /// \return Status - ray::Status Disconnect(); - - /// Mark a new node as connected to GCS asynchronously. - /// - /// \param node_info Information about the node. - /// \param done Callback that is called once the node has been marked to connected. - /// \return Status - ray::Status MarkConnected(const GcsNodeInfo &node_info, const WriteCallback &done); - - /// Mark a different node as disconnected. The NODE ID should never be - /// reused for a new node. - /// - /// \param dead_node_id The ID of the node to mark as dead. - /// \param done Callback that is called once the node has been marked to - /// disconnected. - /// \return Status - ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done); - - ray::Status SubscribeToNodeChange( - const SubscribeCallback &subscribe, - const StatusCallback &done); - - /// Get a node's information from the cache. The cache only contains - /// information for nodes that we've heard a notification for. - /// - /// \param node The node to get information about. - /// \param node_info The node information will be copied here if - /// we have the node in the cache. - /// a nil node ID. - /// \return Whether the node is in the cache. - bool GetNode(const NodeID &node, GcsNodeInfo *node_info) const; - - /// Get the local node's ID. - /// - /// \return The local node's ID. - const NodeID &GetLocalNodeId() const; - - /// Get the local node's information. - /// - /// \return The local node's information. - const GcsNodeInfo &GetLocalNode() const; - - /// Check whether the given node is removed. - /// - /// \param node_id The ID of the node to check. - /// \return Whether the node with specified ID is removed. - bool IsRemoved(const NodeID &node_id) const; - - /// Get the information of all nodes. - /// - /// \return The node ID to node information map. - const std::unordered_map &GetAllNodes() const; - - /// Lookup the node data in the node table. - /// - /// \param lookup Callback that is called after lookup. If the callback is - /// called with an empty vector, then there was no data at the key. - /// \return Status. - Status Lookup(const Callback &lookup); - - /// Returns debug string for class. - /// - /// \return string. - std::string DebugString() const; - - /// The key at which the log of node information is stored. This key must - /// be kept the same across all instances of the NodeTable, so that all - /// nodes append and read from the same key. - NodeID node_log_key_; - - private: - using NodeChangeCallback = - std::function; - - /// Register a callback to call when a new node is added or a node is removed. - /// - /// \param callback The callback to register. - void RegisterNodeChangeCallback(const NodeChangeCallback &callback); - - /// Handle a node table notification. - void HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info); - - /// Whether this node has called Disconnect(). - bool disconnected_{false}; - /// This node's ID. It will be initialized when we call method `Connect(...)`. - NodeID local_node_id_; - /// Information about this node. - GcsNodeInfo local_node_info_; - /// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and - /// RequestNotification. - /// The reason for not using `local_node_id_` is because it is only initialized - /// for registered nodes. - NodeID subscribe_id_{NodeID::FromRandom()}; - /// The callback to call when a new node is added or a node is removed. - NodeChangeCallback node_change_callback_{nullptr}; - /// A cache for information about all nodes. - std::unordered_map node_cache_; - /// The set of removed nodes. - std::unordered_set removed_nodes_; -}; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/test/accessor_test_base.h b/src/ray/gcs/test/accessor_test_base.h deleted file mode 100644 index 7ce8d0bfa..000000000 --- a/src/ray/gcs/test/accessor_test_base.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "ray/common/test_util.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_gcs_client.h" - -namespace ray { - -namespace gcs { - -template -class AccessorTestBase : public ::testing::Test { - public: - AccessorTestBase() { TestSetupUtil::StartUpRedisServers(std::vector()); } - - virtual ~AccessorTestBase() { TestSetupUtil::ShutDownRedisServers(); } - - virtual void SetUp() { - GenTestData(); - - GcsClientOptions options = - GcsClientOptions("127.0.0.1", TEST_REDIS_SERVER_PORTS.front(), "", true); - gcs_client_.reset(new RedisGcsClient(options)); - RAY_CHECK_OK(gcs_client_->Connect(io_service_)); - - work_thread_.reset(new std::thread([this] { - std::unique_ptr work( - new boost::asio::io_service::work(io_service_)); - io_service_.run(); - })); - } - - virtual void TearDown() { - gcs_client_->Disconnect(); - - io_service_.stop(); - work_thread_->join(); - work_thread_.reset(); - - gcs_client_.reset(); - - ClearTestData(); - } - - protected: - virtual void GenTestData() = 0; - - void ClearTestData() { id_to_data_.clear(); } - - void WaitPendingDone(std::chrono::milliseconds timeout) { - WaitPendingDone(pending_count_, timeout); - } - - void WaitPendingDone(std::atomic &pending_count, - std::chrono::milliseconds timeout) { - auto condition = [&pending_count]() { return pending_count == 0; }; - EXPECT_TRUE(WaitForCondition(condition, timeout.count())); - } - - protected: - std::unique_ptr gcs_client_; - - boost::asio::io_service io_service_; - std::unique_ptr work_thread_; - - std::unordered_map> id_to_data_; - - std::atomic pending_count_{0}; - std::chrono::milliseconds wait_pending_timeout_{10000}; -}; - -} // namespace gcs - -} // namespace ray diff --git a/src/ray/gcs/test/redis_actor_info_accessor_test.cc b/src/ray/gcs/test/redis_actor_info_accessor_test.cc deleted file mode 100644 index 49f474621..000000000 --- a/src/ray/gcs/test/redis_actor_info_accessor_test.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "ray/common/test_util.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/gcs/test/accessor_test_base.h" - -namespace ray { - -namespace gcs { - -class ActorInfoAccessorTest : public AccessorTestBase { - protected: - virtual void GenTestData() { - for (size_t i = 0; i < 100; ++i) { - std::shared_ptr actor = std::make_shared(); - actor->set_max_restarts(1); - actor->set_num_restarts(0); - JobID job_id = JobID::FromInt(i); - actor->set_job_id(job_id.Binary()); - actor->set_state(ActorTableData::ALIVE); - ActorID actor_id = ActorID::Of(job_id, RandomTaskId(), /*parent_task_counter=*/i); - actor->set_actor_id(actor_id.Binary()); - id_to_data_[actor_id] = actor; - } - } - - size_t checkpoint_number_{2}; -}; - -TEST_F(ActorInfoAccessorTest, Subscribe) { - ActorInfoAccessor &actor_accessor = gcs_client_->Actors(); - // subscribe - std::atomic sub_pending_count(0); - std::atomic do_sub_pending_count(0); - auto subscribe = [this, &sub_pending_count](const ActorID &actor_id, - const ActorTableData &data) { - const auto it = id_to_data_.find(actor_id); - ASSERT_TRUE(it != id_to_data_.end()); - --sub_pending_count; - }; - auto done = [&do_sub_pending_count](Status status) { - RAY_CHECK_OK(status); - --do_sub_pending_count; - }; - - ++do_sub_pending_count; - RAY_CHECK_OK(actor_accessor.AsyncSubscribeAll(subscribe, done)); - // Wait until subscribe finishes. - WaitPendingDone(do_sub_pending_count, wait_pending_timeout_); -} - -} // namespace gcs - -} // namespace ray - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - RAY_CHECK(argc == 4); - ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1]; - ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2]; - ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3]; - return RUN_ALL_TESTS(); -} diff --git a/src/ray/gcs/test/redis_gcs_client_test.cc b/src/ray/gcs/test/redis_gcs_client_test.cc deleted file mode 100644 index 771d6a703..000000000 --- a/src/ray/gcs/test/redis_gcs_client_test.cc +++ /dev/null @@ -1,1505 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "ray/gcs/redis_gcs_client.h" - -#include "gtest/gtest.h" -#include "ray/common/ray_config.h" -#include "ray/common/test_util.h" -#include "ray/gcs/pb_util.h" -#include "ray/gcs/tables.h" - -extern "C" { -#include "hiredis/hiredis.h" -} - -namespace ray { - -namespace gcs { - -/* Flush redis. */ -static inline void flushall_redis(void) { - redisContext *context = redisConnect("127.0.0.1", TEST_REDIS_SERVER_PORTS.front()); - freeReplyObject(redisCommand(context, "FLUSHALL")); - redisFree(context); -} - -/// A helper function to generate an unique JobID. -inline JobID NextJobID() { - static int32_t counter = 0; - return JobID::FromInt(++counter); -} - -class TestGcs : public ::testing::Test { - public: - TestGcs(CommandType command_type) : num_callbacks_(0), command_type_(command_type) { - TestSetupUtil::StartUpRedisServers(std::vector()); - job_id_ = NextJobID(); - } - - virtual ~TestGcs() { - // Clear all keys in the GCS. - flushall_redis(); - TestSetupUtil::ShutDownRedisServers(); - }; - - virtual void Start() = 0; - - virtual void Stop() = 0; - - uint64_t NumCallbacks() const { return num_callbacks_; } - - void IncrementNumCallbacks() { num_callbacks_++; } - - protected: - uint64_t num_callbacks_; - gcs::CommandType command_type_; - std::shared_ptr client_; - JobID job_id_; -}; - -TestGcs *test; -NodeID local_node_id = NodeID::FromRandom(); - -class TestGcsWithAsio : public TestGcs { - public: - TestGcsWithAsio(CommandType command_type) - : TestGcs(command_type), io_service_(), work_(io_service_) {} - - TestGcsWithAsio() : TestGcsWithAsio(CommandType::kRegular) {} - - ~TestGcsWithAsio() { - // Destroy the client first since it has a reference to the event loop. - client_->Disconnect(); - client_.reset(); - } - - void SetUp() override { - GcsClientOptions options("127.0.0.1", TEST_REDIS_SERVER_PORTS.front(), "", true); - client_ = std::make_shared(options, command_type_); - RAY_CHECK_OK(client_->Connect(io_service_)); - } - - void Start() override { io_service_.run(); } - void Stop() override { io_service_.stop(); } - - private: - boost::asio::io_service io_service_; - // Give the event loop some work so that it's forced to run until Stop() is - // called. - boost::asio::io_service::work work_; -}; - -class TestGcsWithChainAsio : public TestGcsWithAsio { - public: - TestGcsWithChainAsio() : TestGcsWithAsio(gcs::CommandType::kChain){}; -}; - -class TaskTableTestHelper { - public: - /// A helper function that creates a GCS `TaskTableData` object. - static std::shared_ptr CreateTaskTableData(const TaskID &task_id, - uint64_t num_returns = 0) { - auto data = std::make_shared(); - data->mutable_task()->mutable_task_spec()->set_task_id(task_id.Binary()); - data->mutable_task()->mutable_task_spec()->set_num_returns(num_returns); - return data; - } - - /// A helper function that compare whether 2 `TaskTableData` objects are equal. - /// Note, this function only compares fields set by `CreateTaskTableData`. - static bool TaskTableDataEqual(const TaskTableData &data1, const TaskTableData &data2) { - const auto &spec1 = data1.task().task_spec(); - const auto &spec2 = data2.task().task_spec(); - return (spec1.task_id() == spec2.task_id() && - spec1.num_returns() == spec2.num_returns()); - } - - static void TestTableLookup(const JobID &job_id, - std::shared_ptr client) { - const auto task_id = RandomTaskId(); - const auto data = CreateTaskTableData(task_id); - - // Check that we added the correct task. - auto add_callback = [task_id, data](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &d) { - ASSERT_EQ(id, task_id); - ASSERT_TRUE(TaskTableDataEqual(*data, d)); - }; - - // Check that the lookup returns the added task. - auto lookup_callback = [task_id, data](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &d) { - ASSERT_EQ(id, task_id); - ASSERT_TRUE(TaskTableDataEqual(*data, d)); - test->Stop(); - }; - - // Check that the lookup does not return an empty entry. - auto failure_callback = [](gcs::RedisGcsClient *client, const TaskID &id) { - RAY_CHECK(false); - }; - - // Add the task, then do a lookup. - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id, data, add_callback)); - RAY_CHECK_OK(client->raylet_task_table().Lookup(job_id, task_id, lookup_callback, - failure_callback)); - // Run the event loop. The loop will only stop if the Lookup callback is - // called (or an assertion failure). - test->Start(); - } - - static void TestTableLookupFailure(const JobID &job_id, - std::shared_ptr client) { - TaskID task_id = RandomTaskId(); - - // Check that the lookup does not return data. - auto lookup_callback = [](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &d) { RAY_CHECK(false); }; - - // Check that the lookup returns an empty entry. - auto failure_callback = [task_id](gcs::RedisGcsClient *client, const TaskID &id) { - ASSERT_EQ(id, task_id); - test->Stop(); - }; - - // Lookup the task. We have not done any writes, so the key should be empty. - RAY_CHECK_OK(client->raylet_task_table().Lookup(job_id, task_id, lookup_callback, - failure_callback)); - // Run the event loop. The loop will only stop if the failure callback is - // called (or an assertion failure). - test->Start(); - } - - static void TestDeleteKeysFromTable( - const JobID &job_id, std::shared_ptr client, - std::vector> &data_vector, bool stop_at_end) { - std::vector ids; - TaskID task_id; - for (auto &data : data_vector) { - task_id = RandomTaskId(); - ids.push_back(task_id); - // Check that we added the correct object entries. - auto add_callback = [task_id, data](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &d) { - ASSERT_EQ(id, task_id); - ASSERT_TRUE(TaskTableDataEqual(*data, d)); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id, data, add_callback)); - } - for (const auto &task_id : ids) { - auto task_lookup_callback = [task_id](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &data) { - ASSERT_EQ(id, task_id); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->raylet_task_table().Lookup(job_id, task_id, - task_lookup_callback, nullptr)); - } - if (ids.size() == 1) { - client->raylet_task_table().Delete(job_id, ids[0]); - } else { - client->raylet_task_table().Delete(job_id, ids); - } - auto expected_failure_callback = [](RedisGcsClient *client, const TaskID &id) { - ASSERT_TRUE(true); - test->IncrementNumCallbacks(); - }; - auto undesired_callback = [](gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &data) { ASSERT_TRUE(false); }; - for (size_t i = 0; i < ids.size(); ++i) { - RAY_CHECK_OK(client->raylet_task_table().Lookup(job_id, task_id, undesired_callback, - expected_failure_callback)); - } - if (stop_at_end) { - auto stop_callback = [](RedisGcsClient *client, const TaskID &id) { test->Stop(); }; - RAY_CHECK_OK( - client->raylet_task_table().Lookup(job_id, ids[0], nullptr, stop_callback)); - } - } - - static void TestTableSubscribeId(const JobID &job_id, - std::shared_ptr client) { - size_t num_modifications = 3; - - // Add a table entry. - TaskID task_id1 = RandomTaskId(); - - // Add a table entry at a second key. - TaskID task_id2 = RandomTaskId(); - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto notification_callback = [task_id2, num_modifications]( - gcs::RedisGcsClient *client, const TaskID &id, - const TaskTableData &data) { - // Check that we only get notifications for the requested key. - ASSERT_EQ(id, task_id2); - // Check that we get notifications in the same order as the writes. - ASSERT_TRUE( - TaskTableDataEqual(data, *CreateTaskTableData(task_id2, test->NumCallbacks()))); - test->IncrementNumCallbacks(); - if (test->NumCallbacks() == num_modifications) { - test->Stop(); - } - }; - - // The failure callback should be called once since both keys start as empty. - bool failure_notification_received = false; - auto failure_callback = [task_id2, &failure_notification_received]( - gcs::RedisGcsClient *client, const TaskID &id) { - ASSERT_EQ(id, task_id2); - // The failure notification should be the first notification received. - ASSERT_EQ(test->NumCallbacks(), 0); - failure_notification_received = true; - }; - - // The callback for subscription success. Once we've subscribed, request - // notifications for only one of the keys, then write to both keys. - auto subscribe_callback = [job_id, task_id1, task_id2, - num_modifications](gcs::RedisGcsClient *client) { - // Request notifications for one of the keys. - RAY_CHECK_OK(client->raylet_task_table().RequestNotifications( - job_id, task_id2, local_node_id, nullptr)); - // Write both keys. We should only receive notifications for the key that - // we requested them for. - for (uint64_t i = 0; i < num_modifications; i++) { - auto data = CreateTaskTableData(task_id1, i); - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id1, data, nullptr)); - } - for (uint64_t i = 0; i < num_modifications; i++) { - auto data = CreateTaskTableData(task_id2, i); - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id2, data, nullptr)); - } - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->raylet_task_table().Subscribe( - job_id, local_node_id, notification_callback, failure_callback, - subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that the failure callback was called since the key was initially - // empty. - ASSERT_TRUE(failure_notification_received); - // Check that we received one notification callback for each write to the - // requested key. - ASSERT_EQ(test->NumCallbacks(), num_modifications); - } - - static void TestTableSubscribeCancel(const JobID &job_id, - std::shared_ptr client) { - // Add a table entry. - const auto task_id = RandomTaskId(); - uint64_t num_modifications = 3; - const auto data = CreateTaskTableData(task_id, 0); - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id, data, nullptr)); - - // The failure callback should not be called since all keys are non-empty - // when notifications are requested. - auto failure_callback = [](gcs::RedisGcsClient *client, const TaskID &id) { - RAY_CHECK(false); - }; - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto notification_callback = [task_id, num_modifications](gcs::RedisGcsClient *client, - const TaskID &id, - const TaskTableData &data) { - ASSERT_EQ(id, task_id); - // Check that we only get notifications for the first and last writes, - // since notifications are canceled in between. - if (test->NumCallbacks() == 0) { - ASSERT_TRUE(TaskTableDataEqual(data, *CreateTaskTableData(task_id, 0))); - } else { - ASSERT_TRUE(TaskTableDataEqual( - data, *CreateTaskTableData(task_id, num_modifications - 1))); - } - test->IncrementNumCallbacks(); - if (test->NumCallbacks() == num_modifications - 1) { - test->Stop(); - } - }; - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto subscribe_callback = [job_id, task_id, - num_modifications](gcs::RedisGcsClient *client) { - // Request notifications, then cancel immediately. We should receive a - // notification for the current value at the key. - RAY_CHECK_OK(client->raylet_task_table().RequestNotifications( - job_id, task_id, local_node_id, nullptr)); - RAY_CHECK_OK(client->raylet_task_table().CancelNotifications( - job_id, task_id, local_node_id, nullptr)); - // Write to the key. Since we canceled notifications, we should not receive - // a notification for these writes. - for (uint64_t i = 1; i < num_modifications; i++) { - auto data = CreateTaskTableData(task_id, i); - RAY_CHECK_OK(client->raylet_task_table().Add(job_id, task_id, data, nullptr)); - } - // Request notifications again. We should receive a notification for the - // current value at the key. - RAY_CHECK_OK(client->raylet_task_table().RequestNotifications( - job_id, task_id, local_node_id, nullptr)); - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->raylet_task_table().Subscribe( - job_id, local_node_id, notification_callback, failure_callback, - subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that we received a notification callback for the first and least - // writes to the key, since notifications are canceled in between. - ASSERT_EQ(test->NumCallbacks(), 2); - } -}; - -// Convenient macro to test across {ae, asio} x {regular, chain} x {the tests}. -// Undefined at the end. -#define TEST_TASK_TABLE_MACRO(FIXTURE, TEST) \ - TEST_F(FIXTURE, TEST) { \ - test = this; \ - TaskTableTestHelper::TEST(job_id_, client_); \ - } - -TEST_TASK_TABLE_MACRO(TestGcsWithAsio, TestTableLookup); - -class LogLookupTestHelper { - public: - static void TestLogLookup(const JobID &job_id, - std::shared_ptr client) { - // Append some entries to the log at an object ID. - TaskID task_id = RandomTaskId(); - std::vector node_manager_ids = {"abc", "def", "ghi"}; - for (auto &node_manager_id : node_manager_ids) { - auto data = std::make_shared(); - data->set_node_manager_id(node_manager_id); - // Check that we added the correct object entries. - auto add_callback = [task_id, data](gcs::RedisGcsClient *client, const TaskID &id, - const TaskReconstructionData &d) { - ASSERT_EQ(id, task_id); - ASSERT_EQ(data->node_manager_id(), d.node_manager_id()); - }; - RAY_CHECK_OK( - client->task_reconstruction_log().Append(job_id, task_id, data, add_callback)); - } - - // Check that lookup returns the added object entries. - auto lookup_callback = [task_id, node_manager_ids]( - gcs::RedisGcsClient *client, const TaskID &id, - const std::vector &data) { - ASSERT_EQ(id, task_id); - for (const auto &entry : data) { - ASSERT_EQ(entry.node_manager_id(), node_manager_ids[test->NumCallbacks()]); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == node_manager_ids.size()) { - test->Stop(); - } - }; - - // Do a lookup at the object ID. - RAY_CHECK_OK( - client->task_reconstruction_log().Lookup(job_id, task_id, lookup_callback)); - // Run the event loop. The loop will only stop if the Lookup callback is - // called (or an assertion failure). - test->Start(); - ASSERT_EQ(test->NumCallbacks(), node_manager_ids.size()); - } - - static void TestLogAppendAt(const JobID &job_id, - std::shared_ptr client) { - TaskID task_id = RandomTaskId(); - std::vector node_manager_ids = {"A", "B"}; - std::vector> data_log; - for (const auto &node_manager_id : node_manager_ids) { - auto data = std::make_shared(); - data->set_node_manager_id(node_manager_id); - data_log.push_back(data); - } - - // Check that we added the correct task. - auto failure_callback = [task_id](gcs::RedisGcsClient *client, const TaskID &id, - const TaskReconstructionData &d) { - ASSERT_EQ(id, task_id); - test->IncrementNumCallbacks(); - }; - - // Will succeed. - RAY_CHECK_OK(client->task_reconstruction_log().Append(job_id, task_id, - data_log.front(), - /*done callback=*/nullptr)); - // Append at index 0 will fail. - RAY_CHECK_OK(client->task_reconstruction_log().AppendAt( - job_id, task_id, data_log[1], - /*done callback=*/nullptr, failure_callback, /*log_length=*/0)); - - // Append at index 2 will fail. - RAY_CHECK_OK(client->task_reconstruction_log().AppendAt( - job_id, task_id, data_log[1], - /*done callback=*/nullptr, failure_callback, /*log_length=*/2)); - - // Append at index 1 will succeed. - RAY_CHECK_OK(client->task_reconstruction_log().AppendAt( - job_id, task_id, data_log[1], - /*done callback=*/nullptr, failure_callback, /*log_length=*/1)); - - auto lookup_callback = [node_manager_ids]( - gcs::RedisGcsClient *client, const TaskID &id, - const std::vector &data) { - std::vector appended_managers; - for (const auto &entry : data) { - appended_managers.push_back(entry.node_manager_id()); - } - ASSERT_EQ(appended_managers, node_manager_ids); - test->Stop(); - }; - RAY_CHECK_OK( - client->task_reconstruction_log().Lookup(job_id, task_id, lookup_callback)); - // Run the event loop. The loop will only stop if the Lookup callback is - // called (or an assertion failure). - test->Start(); - ASSERT_EQ(test->NumCallbacks(), 2); - } -}; - -TEST_F(TestGcsWithAsio, TestLogLookup) { - test = this; - LogLookupTestHelper::TestLogLookup(job_id_, client_); -} - -TEST_TASK_TABLE_MACRO(TestGcsWithAsio, TestTableLookupFailure); - -TEST_F(TestGcsWithAsio, TestLogAppendAt) { - test = this; - LogLookupTestHelper::TestLogAppendAt(job_id_, client_); -} - -class SetTestHelper { - public: - static void TestSet(const JobID &job_id, std::shared_ptr client) { - // Add some entries to the set at an object ID. - ObjectID object_id = ObjectID::FromRandom(); - std::vector managers = {"abc", "def", "ghi"}; - for (auto &manager : managers) { - auto data = std::make_shared(); - data->set_manager(manager); - // Check that we added the correct object entries. - auto add_callback = [object_id, data](gcs::RedisGcsClient *client, - const ObjectID &id, - const ObjectTableData &d) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data->manager(), d.manager()); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->object_table().Add(job_id, object_id, data, add_callback)); - } - - // Check that lookup returns the added object entries. - auto lookup_callback = [object_id, managers]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector &data) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data.size(), managers.size()); - test->IncrementNumCallbacks(); - }; - - // Do a lookup at the object ID. - RAY_CHECK_OK(client->object_table().Lookup(job_id, object_id, lookup_callback)); - - for (auto &manager : managers) { - auto data = std::make_shared(); - data->set_manager(manager); - // Check that we added the correct object entries. - auto remove_entry_callback = [object_id, data](gcs::RedisGcsClient *client, - const ObjectID &id, - const ObjectTableData &d) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data->manager(), d.manager()); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK( - client->object_table().Remove(job_id, object_id, data, remove_entry_callback)); - } - - // Check that the entries are removed. - auto lookup_callback2 = [object_id, managers]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector &data) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data.size(), 0); - test->IncrementNumCallbacks(); - test->Stop(); - }; - - // Do a lookup at the object ID. - RAY_CHECK_OK(client->object_table().Lookup(job_id, object_id, lookup_callback2)); - // Run the event loop. The loop will only stop if the Lookup callback is - // called (or an assertion failure). - test->Start(); - ASSERT_EQ(test->NumCallbacks(), managers.size() * 2 + 2); - } - - static void TestDeleteKeysFromSet( - const JobID &job_id, std::shared_ptr client, - std::vector> &data_vector) { - std::vector ids; - ObjectID object_id; - for (auto &data : data_vector) { - object_id = ObjectID::FromRandom(); - ids.push_back(object_id); - // Check that we added the correct object entries. - auto add_callback = [object_id, data](gcs::RedisGcsClient *client, - const ObjectID &id, - const ObjectTableData &d) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data->manager(), d.manager()); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->object_table().Add(job_id, object_id, data, add_callback)); - } - for (const auto &object_id : ids) { - // Check that lookup returns the added object entries. - auto lookup_callback = [object_id, data_vector]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector &data) { - ASSERT_EQ(id, object_id); - ASSERT_EQ(data.size(), 1); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->object_table().Lookup(job_id, object_id, lookup_callback)); - } - if (ids.size() == 1) { - client->object_table().Delete(job_id, ids[0]); - } else { - client->object_table().Delete(job_id, ids); - } - for (const auto &object_id : ids) { - auto lookup_callback = [object_id](gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector &data) { - ASSERT_EQ(id, object_id); - ASSERT_TRUE(data.size() == 0); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->object_table().Lookup(job_id, object_id, lookup_callback)); - } - } - - static void TestSetSubscribeAll(const JobID &job_id, - std::shared_ptr client) { - std::vector object_ids; - for (int i = 0; i < 3; i++) { - object_ids.emplace_back(ObjectID::FromRandom()); - } - std::vector managers = {"abc", "def", "ghi"}; - - // Callback for a notification. - auto notification_callback = - [object_ids, managers]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector ¬ifications) { - if (test->NumCallbacks() < 3 * 3) { - ASSERT_EQ(notifications[0].GetGcsChangeMode(), GcsChangeMode::APPEND_OR_ADD); - } else { - ASSERT_EQ(notifications[0].GetGcsChangeMode(), GcsChangeMode::REMOVE); - } - ASSERT_EQ(id, object_ids[test->NumCallbacks() / 3 % 3]); - // Check that we get notifications in the same order as the writes. - for (const auto &entry : notifications[0].GetData()) { - ASSERT_EQ(entry.manager(), managers[test->NumCallbacks() % 3]); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == object_ids.size() * 3 * 2) { - test->Stop(); - } - }; - - // Callback for subscription success. We are guaranteed to receive - // notifications after this is called. - auto subscribe_callback = [job_id, object_ids, - managers](gcs::RedisGcsClient *client) { - // We have subscribed. Do the writes to the table. - for (size_t i = 0; i < object_ids.size(); i++) { - for (size_t j = 0; j < managers.size(); j++) { - auto data = std::make_shared(); - data->set_manager(managers[j]); - for (int k = 0; k < 3; k++) { - // Add the same entry several times. - // Expect no notification if the entry already exists. - RAY_CHECK_OK( - client->object_table().Add(job_id, object_ids[i], data, nullptr)); - } - } - } - for (size_t i = 0; i < object_ids.size(); i++) { - for (size_t j = 0; j < managers.size(); j++) { - auto data = std::make_shared(); - data->set_manager(managers[j]); - for (int k = 0; k < 3; k++) { - // Remove the same entry several times. - // Expect no notification if the entry doesn't exist. - RAY_CHECK_OK( - client->object_table().Remove(job_id, object_ids[i], data, nullptr)); - } - } - } - }; - - // Subscribe to all driver table notifications. Once we have successfully - // subscribed, we will append to the key several times and check that we get - // notified for each. - RAY_CHECK_OK(client->object_table().Subscribe( - job_id, NodeID::Nil(), notification_callback, subscribe_callback)); - - // Run the event loop. The loop will only stop if the registered subscription - // callback is called (or an assertion failure). - test->Start(); - // Check that we received one notification callback for each write. - ASSERT_EQ(test->NumCallbacks(), object_ids.size() * 3 * 2); - } - - static void TestSetSubscribeId(const JobID &job_id, - std::shared_ptr client) { - // Add a set entry. - ObjectID object_id1 = ObjectID::FromRandom(); - std::vector managers1 = {"abc", "def", "ghi"}; - auto data1 = std::make_shared(); - data1->set_manager(managers1[0]); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id1, data1, nullptr)); - - // Add a set entry at a second key. - ObjectID object_id2 = ObjectID::FromRandom(); - std::vector managers2 = {"jkl", "mno", "pqr"}; - auto data2 = std::make_shared(); - data2->set_manager(managers2[0]); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id2, data2, nullptr)); - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto notification_callback = - [object_id2, managers2]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector ¬ifications) { - ASSERT_EQ(notifications[0].GetGcsChangeMode(), GcsChangeMode::APPEND_OR_ADD); - // Check that we only get notifications for the requested key. - ASSERT_EQ(id, object_id2); - // Check that we get notifications in the same order as the writes. - for (const auto &entry : notifications[0].GetData()) { - ASSERT_EQ(entry.manager(), managers2[test->NumCallbacks()]); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == managers2.size()) { - test->Stop(); - } - }; - - // The callback for subscription success. Once we've subscribed, request - // notifications for only one of the keys, then write to both keys. - auto subscribe_callback = [job_id, object_id1, object_id2, managers1, - managers2](gcs::RedisGcsClient *client) { - // Request notifications for one of the keys. - RAY_CHECK_OK(client->object_table().RequestNotifications(job_id, object_id2, - local_node_id, nullptr)); - // Write both keys. We should only receive notifications for the key that - // we requested them for. - auto remaining = std::vector(++managers1.begin(), managers1.end()); - for (const auto &manager : remaining) { - auto data = std::make_shared(); - data->set_manager(manager); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id1, data, nullptr)); - } - remaining = std::vector(++managers2.begin(), managers2.end()); - for (const auto &manager : remaining) { - auto data = std::make_shared(); - data->set_manager(manager); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id2, data, nullptr)); - } - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->object_table().Subscribe( - job_id, local_node_id, notification_callback, subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that we received one notification callback for each write to the - // requested key. - ASSERT_EQ(test->NumCallbacks(), managers2.size()); - } - - static void TestSetSubscribeCancel(const JobID &job_id, - std::shared_ptr client) { - // Add a set entry. - ObjectID object_id = ObjectID::FromRandom(); - std::vector managers = {"jkl", "mno", "pqr"}; - auto data = std::make_shared(); - data->set_manager(managers[0]); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id, data, nullptr)); - - // The callback for a notification from the object table. This should only be - // received for the object that we requested notifications for. - auto notification_callback = - [object_id, managers]( - gcs::RedisGcsClient *client, const ObjectID &id, - const std::vector ¬ifications) { - ASSERT_EQ(notifications[0].GetGcsChangeMode(), GcsChangeMode::APPEND_OR_ADD); - ASSERT_EQ(id, object_id); - // Check that we get a duplicate notification for the first write. We get a - // duplicate notification because notifications - // are canceled after the first write, then requested again. - const std::vector &data = notifications[0].GetData(); - if (data.size() == 1) { - // first notification - ASSERT_EQ(data[0].manager(), managers[0]); - test->IncrementNumCallbacks(); - } else { - // second notification - ASSERT_EQ(data.size(), managers.size()); - std::unordered_set managers_set(managers.begin(), - managers.end()); - std::unordered_set data_managers_set; - for (const auto &entry : data) { - data_managers_set.insert(entry.manager()); - test->IncrementNumCallbacks(); - } - ASSERT_EQ(managers_set, data_managers_set); - } - if (test->NumCallbacks() == managers.size() + 1) { - test->Stop(); - } - }; - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto subscribe_callback = [job_id, object_id, managers](gcs::RedisGcsClient *client) { - // Request notifications, then cancel immediately. We should receive a - // notification for the current value at the key. - RAY_CHECK_OK(client->object_table().RequestNotifications(job_id, object_id, - local_node_id, nullptr)); - RAY_CHECK_OK(client->object_table().CancelNotifications(job_id, object_id, - local_node_id, nullptr)); - // Add to the key. Since we canceled notifications, we should not - // receive a notification for these writes. - auto remaining = std::vector(++managers.begin(), managers.end()); - for (const auto &manager : remaining) { - auto data = std::make_shared(); - data->set_manager(manager); - RAY_CHECK_OK(client->object_table().Add(job_id, object_id, data, nullptr)); - } - // Request notifications again. We should receive a notification for the - // current values at the key. - RAY_CHECK_OK(client->object_table().RequestNotifications(job_id, object_id, - local_node_id, nullptr)); - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->object_table().Subscribe( - job_id, local_node_id, notification_callback, subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that we received a notification callback for the first append to the - // key, then a notification for all of the appends, because we cancel - // notifications in between. - ASSERT_EQ(test->NumCallbacks(), managers.size() + 1); - } -}; - -TEST_F(TestGcsWithAsio, TestSet) { - test = this; - SetTestHelper::TestSet(job_id_, client_); -} - -class LogDeleteTestHelper { - public: - static void TestDeleteKeysFromLog( - const JobID &job_id, std::shared_ptr client, - std::vector> &data_vector) { - std::vector ids; - TaskID task_id; - for (auto &data : data_vector) { - task_id = RandomTaskId(); - ids.push_back(task_id); - // Check that we added the correct object entries. - auto add_callback = [task_id, data](gcs::RedisGcsClient *client, const TaskID &id, - const TaskReconstructionData &d) { - ASSERT_EQ(id, task_id); - ASSERT_EQ(data->node_manager_id(), d.node_manager_id()); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK( - client->task_reconstruction_log().Append(job_id, task_id, data, add_callback)); - } - for (const auto &task_id : ids) { - // Check that lookup returns the added object entries. - auto lookup_callback = [task_id, data_vector]( - gcs::RedisGcsClient *client, const TaskID &id, - const std::vector &data) { - ASSERT_EQ(id, task_id); - ASSERT_EQ(data.size(), 1); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK( - client->task_reconstruction_log().Lookup(job_id, task_id, lookup_callback)); - } - if (ids.size() == 1) { - client->task_reconstruction_log().Delete(job_id, ids[0]); - } else { - client->task_reconstruction_log().Delete(job_id, ids); - } - for (const auto &task_id : ids) { - auto lookup_callback = [task_id](gcs::RedisGcsClient *client, const TaskID &id, - const std::vector &data) { - ASSERT_EQ(id, task_id); - ASSERT_TRUE(data.size() == 0); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK( - client->task_reconstruction_log().Lookup(job_id, task_id, lookup_callback)); - } - } -}; - -// Test delete function for keys of Log or Table. -void TestDeleteKeys(const JobID &job_id, std::shared_ptr client) { - // Test delete function for keys of Log. - std::vector> task_reconstruction_vector; - auto AppendTaskReconstructionData = [&task_reconstruction_vector](size_t add_count) { - for (size_t i = 0; i < add_count; ++i) { - auto data = std::make_shared(); - data->set_node_manager_id(ObjectID::FromRandom().Hex()); - task_reconstruction_vector.push_back(data); - } - }; - // Test one element case. - AppendTaskReconstructionData(1); - ASSERT_EQ(task_reconstruction_vector.size(), 1); - LogDeleteTestHelper::TestDeleteKeysFromLog(job_id, client, task_reconstruction_vector); - // Test the case for more than one elements and less than - // maximum_gcs_deletion_batch_size. - AppendTaskReconstructionData(RayConfig::instance().maximum_gcs_deletion_batch_size() / - 2); - ASSERT_GT(task_reconstruction_vector.size(), 1); - ASSERT_LT(task_reconstruction_vector.size(), - RayConfig::instance().maximum_gcs_deletion_batch_size()); - LogDeleteTestHelper::TestDeleteKeysFromLog(job_id, client, task_reconstruction_vector); - // Test the case for more than maximum_gcs_deletion_batch_size. - // The Delete function will split the data into two commands. - AppendTaskReconstructionData(RayConfig::instance().maximum_gcs_deletion_batch_size() / - 2); - ASSERT_GT(task_reconstruction_vector.size(), - RayConfig::instance().maximum_gcs_deletion_batch_size()); - LogDeleteTestHelper::TestDeleteKeysFromLog(job_id, client, task_reconstruction_vector); - - // Test delete function for keys of Table. - std::vector> task_vector; - auto AppendTaskData = [&task_vector](size_t add_count) { - for (size_t i = 0; i < add_count; ++i) { - task_vector.push_back(TaskTableTestHelper::CreateTaskTableData(RandomTaskId())); - } - }; - AppendTaskData(1); - ASSERT_EQ(task_vector.size(), 1); - TaskTableTestHelper::TestDeleteKeysFromTable(job_id, client, task_vector, false); - - AppendTaskData(RayConfig::instance().maximum_gcs_deletion_batch_size() / 2); - ASSERT_GT(task_vector.size(), 1); - ASSERT_LT(task_vector.size(), RayConfig::instance().maximum_gcs_deletion_batch_size()); - TaskTableTestHelper::TestDeleteKeysFromTable(job_id, client, task_vector, false); - - AppendTaskData(RayConfig::instance().maximum_gcs_deletion_batch_size() / 2); - ASSERT_GT(task_vector.size(), RayConfig::instance().maximum_gcs_deletion_batch_size()); - TaskTableTestHelper::TestDeleteKeysFromTable(job_id, client, task_vector, true); - - test->Start(); - ASSERT_GT(test->NumCallbacks(), - 9 * RayConfig::instance().maximum_gcs_deletion_batch_size()); - - // Test delete function for keys of Set. - std::vector> object_vector; - auto AppendObjectData = [&object_vector](size_t add_count) { - for (size_t i = 0; i < add_count; ++i) { - auto data = std::make_shared(); - data->set_manager(ObjectID::FromRandom().Hex()); - object_vector.push_back(data); - } - }; - // Test one element case. - AppendObjectData(1); - ASSERT_EQ(object_vector.size(), 1); - SetTestHelper::TestDeleteKeysFromSet(job_id, client, object_vector); - // Test the case for more than one elements and less than - // maximum_gcs_deletion_batch_size. - AppendObjectData(RayConfig::instance().maximum_gcs_deletion_batch_size() / 2); - ASSERT_GT(object_vector.size(), 1); - ASSERT_LT(object_vector.size(), - RayConfig::instance().maximum_gcs_deletion_batch_size()); - SetTestHelper::TestDeleteKeysFromSet(job_id, client, object_vector); - // Test the case for more than maximum_gcs_deletion_batch_size. - // The Delete function will split the data into two commands. - AppendObjectData(RayConfig::instance().maximum_gcs_deletion_batch_size() / 2); - ASSERT_GT(object_vector.size(), - RayConfig::instance().maximum_gcs_deletion_batch_size()); - SetTestHelper::TestDeleteKeysFromSet(job_id, client, object_vector); -} - -TEST_F(TestGcsWithAsio, TestDeleteKey) { - test = this; - TestDeleteKeys(job_id_, client_); -} - -/// A helper class for Log Subscribe testing. -class LogSubscribeTestHelper { - public: - static void TestLogSubscribeAll(const JobID &job_id, - std::shared_ptr client) { - std::vector job_ids; - for (int i = 0; i < 3; i++) { - job_ids.emplace_back(NextJobID()); - } - // Callback for a notification. - auto notification_callback = [job_ids](gcs::RedisGcsClient *client, const JobID &id, - const std::vector data) { - ASSERT_EQ(id, job_ids[test->NumCallbacks()]); - // Check that we get notifications in the same order as the writes. - for (const auto &entry : data) { - ASSERT_EQ(entry.job_id(), job_ids[test->NumCallbacks()].Binary()); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == job_ids.size()) { - test->Stop(); - } - }; - - // Callback for subscription success. We are guaranteed to receive - // notifications after this is called. - auto subscribe_callback = [job_ids](gcs::RedisGcsClient *client) { - // We have subscribed. Do the writes to the table. - for (size_t i = 0; i < job_ids.size(); i++) { - auto job_info_ptr = CreateJobTableData(job_ids[i], false, 0, "localhost", 1); - RAY_CHECK_OK( - client->job_table().Append(job_ids[i], job_ids[i], job_info_ptr, nullptr)); - } - }; - - // Subscribe to all driver table notifications. Once we have successfully - // subscribed, we will append to the key several times and check that we get - // notified for each. - RAY_CHECK_OK(client->job_table().Subscribe( - job_id, NodeID::Nil(), notification_callback, subscribe_callback)); - - // Run the event loop. The loop will only stop if the registered subscription - // callback is called (or an assertion failure). - test->Start(); - // Check that we received one notification callback for each write. - ASSERT_EQ(test->NumCallbacks(), job_ids.size()); - } - - static void TestLogSubscribeId(const JobID &job_id, - std::shared_ptr client) { - // Add a log entry. - JobID job_id1 = NextJobID(); - std::vector job_ids1 = {"abc", "def", "ghi"}; - auto data1 = std::make_shared(); - data1->set_job_id(job_ids1[0]); - RAY_CHECK_OK(client->job_table().Append(job_id, job_id1, data1, nullptr)); - - // Add a log entry at a second key. - JobID job_id2 = NextJobID(); - std::vector job_ids2 = {"jkl", "mno", "pqr"}; - auto data2 = std::make_shared(); - data2->set_job_id(job_ids2[0]); - RAY_CHECK_OK(client->job_table().Append(job_id, job_id2, data2, nullptr)); - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto notification_callback = [job_id2, job_ids2]( - gcs::RedisGcsClient *client, const JobID &id, - const std::vector &data) { - // Check that we only get notifications for the requested key. - ASSERT_EQ(id, job_id2); - // Check that we get notifications in the same order as the writes. - for (const auto &entry : data) { - ASSERT_EQ(entry.job_id(), job_ids2[test->NumCallbacks()]); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == job_ids2.size()) { - test->Stop(); - } - }; - - // The callback for subscription success. Once we've subscribed, request - // notifications for only one of the keys, then write to both keys. - auto subscribe_callback = [job_id, job_id1, job_id2, job_ids1, - job_ids2](gcs::RedisGcsClient *client) { - // Request notifications for one of the keys. - RAY_CHECK_OK(client->job_table().RequestNotifications(job_id, job_id2, - local_node_id, nullptr)); - // Write both keys. We should only receive notifications for the key that - // we requested them for. - auto remaining = std::vector(++job_ids1.begin(), job_ids1.end()); - for (const auto &job_id_it : remaining) { - auto data = std::make_shared(); - data->set_job_id(job_id_it); - RAY_CHECK_OK(client->job_table().Append(job_id, job_id1, data, nullptr)); - } - remaining = std::vector(++job_ids2.begin(), job_ids2.end()); - for (const auto &job_id_it : remaining) { - auto data = std::make_shared(); - data->set_job_id(job_id_it); - RAY_CHECK_OK(client->job_table().Append(job_id, job_id2, data, nullptr)); - } - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->job_table().Subscribe( - job_id, local_node_id, notification_callback, subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that we received one notification callback for each write to the - // requested key. - ASSERT_EQ(test->NumCallbacks(), job_ids2.size()); - } - - static void TestLogSubscribeCancel(const JobID &job_id, - std::shared_ptr client) { - // Add a log entry. - JobID random_job_id = NextJobID(); - std::vector job_ids = {"jkl", "mno", "pqr"}; - auto data = std::make_shared(); - data->set_job_id(job_ids[0]); - RAY_CHECK_OK(client->job_table().Append(job_id, random_job_id, data, nullptr)); - - // The callback for a notification from the object table. This should only be - // received for the object that we requested notifications for. - auto notification_callback = [random_job_id, job_ids]( - gcs::RedisGcsClient *client, const JobID &id, - const std::vector &data) { - ASSERT_EQ(id, random_job_id); - // Check that we get a duplicate notification for the first write. We get a - // duplicate notification because the log is append-only and notifications - // are canceled after the first write, then requested again. - auto job_ids_copy = job_ids; - job_ids_copy.insert(job_ids_copy.begin(), job_ids_copy.front()); - for (const auto &entry : data) { - ASSERT_EQ(entry.job_id(), job_ids_copy[test->NumCallbacks()]); - test->IncrementNumCallbacks(); - } - if (test->NumCallbacks() == job_ids_copy.size()) { - test->Stop(); - } - }; - - // The callback for a notification from the table. This should only be - // received for keys that we requested notifications for. - auto subscribe_callback = [job_id, random_job_id, - job_ids](gcs::RedisGcsClient *client) { - // Request notifications, then cancel immediately. We should receive a - // notification for the current value at the key. - RAY_CHECK_OK(client->job_table().RequestNotifications(job_id, random_job_id, - local_node_id, nullptr)); - RAY_CHECK_OK(client->job_table().CancelNotifications(job_id, random_job_id, - local_node_id, nullptr)); - // Append to the key. Since we canceled notifications, we should not - // receive a notification for these writes. - auto remaining = std::vector(++job_ids.begin(), job_ids.end()); - for (const auto &remaining_job_id : remaining) { - auto data = std::make_shared(); - data->set_job_id(remaining_job_id); - RAY_CHECK_OK(client->job_table().Append(job_id, random_job_id, data, nullptr)); - } - // Request notifications again. We should receive a notification for the - // current values at the key. - RAY_CHECK_OK(client->job_table().RequestNotifications(job_id, random_job_id, - local_node_id, nullptr)); - }; - - // Subscribe to notifications for this client. This allows us to request and - // receive notifications for specific keys. - RAY_CHECK_OK(client->job_table().Subscribe( - job_id, local_node_id, notification_callback, subscribe_callback)); - // Run the event loop. The loop will only stop if the registered subscription - // callback is called for the requested key. - test->Start(); - // Check that we received a notification callback for the first append to the - // key, then a notification for all of the appends, because we cancel - // notifications in between. - ASSERT_EQ(test->NumCallbacks(), job_ids.size() + 1); - } -}; - -TEST_F(TestGcsWithAsio, TestLogSubscribeAll) { - test = this; - LogSubscribeTestHelper::TestLogSubscribeAll(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestSetSubscribeAll) { - test = this; - SetTestHelper::TestSetSubscribeAll(job_id_, client_); -} - -TEST_TASK_TABLE_MACRO(TestGcsWithAsio, TestTableSubscribeId); - -TEST_F(TestGcsWithAsio, TestLogSubscribeId) { - test = this; - LogSubscribeTestHelper::TestLogSubscribeId(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestSetSubscribeId) { - test = this; - SetTestHelper::TestSetSubscribeId(job_id_, client_); -} - -TEST_TASK_TABLE_MACRO(TestGcsWithAsio, TestTableSubscribeCancel); - -TEST_F(TestGcsWithAsio, TestLogSubscribeCancel) { - test = this; - LogSubscribeTestHelper::TestLogSubscribeCancel(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestSetSubscribeCancel) { - test = this; - SetTestHelper::TestSetSubscribeCancel(job_id_, client_); -} - -/// A helper class for NodeTable testing. -class NodeTableTestHelper { - public: - static void NodeTableNotification(std::shared_ptr client, - const NodeID &node_id, const GcsNodeInfo &data, - bool is_alive) { - NodeID added_id = local_node_id; - ASSERT_EQ(node_id, added_id); - ASSERT_EQ(NodeID::FromBinary(data.node_id()), added_id); - ASSERT_EQ(data.state() == GcsNodeInfo::ALIVE, is_alive); - - GcsNodeInfo cached_node; - ASSERT_TRUE(client->node_table().GetNode(added_id, &cached_node)); - ASSERT_EQ(NodeID::FromBinary(cached_node.node_id()), added_id); - ASSERT_EQ(cached_node.state() == GcsNodeInfo::ALIVE, is_alive); - } - - static void TestNodeTableConnect(const JobID &job_id, - std::shared_ptr client) { - // Subscribe to a node gets added and removed. The latter - // event will stop the event loop. - RAY_CHECK_OK(client->node_table().SubscribeToNodeChange( - [client](const NodeID &id, const GcsNodeInfo &data) { - // TODO(micafan) - RAY_LOG(INFO) << "Test alive=" << data.state() << " id=" << id; - if (data.state() == GcsNodeInfo::ALIVE) { - NodeTableNotification(client, id, data, true); - test->Stop(); - } - }, - nullptr)); - - // Connect and disconnect to node table. We should receive notifications - // for the addition and removal of our own entry. - GcsNodeInfo local_node_info; - local_node_info.set_node_id(local_node_id.Binary()); - local_node_info.set_node_manager_address("127.0.0.1"); - local_node_info.set_node_manager_port(0); - local_node_info.set_object_manager_port(0); - RAY_CHECK_OK(client->node_table().Connect(local_node_info)); - test->Start(); - } - - static void TestNodeTableDisconnect(const JobID &job_id, - std::shared_ptr client) { - // Register callbacks for when a node gets added and removed. The latter - // event will stop the event loop. - RAY_CHECK_OK(client->node_table().SubscribeToNodeChange( - [client](const NodeID &id, const GcsNodeInfo &data) { - if (data.state() == GcsNodeInfo::ALIVE) { - NodeTableNotification(client, id, data, /*is_insertion=*/true); - // Disconnect from the node table. We should receive a notification - // for the removal of our own entry. - RAY_CHECK_OK(client->node_table().Disconnect()); - } else { - NodeTableNotification(client, id, data, /*is_insertion=*/false); - test->Stop(); - } - }, - nullptr)); - - // Connect to the node table. We should receive notification for the - // addition of our own entry. - GcsNodeInfo local_node_info; - local_node_info.set_node_id(local_node_id.Binary()); - local_node_info.set_node_manager_address("127.0.0.1"); - local_node_info.set_node_manager_port(0); - local_node_info.set_object_manager_port(0); - RAY_CHECK_OK(client->node_table().Connect(local_node_info)); - test->Start(); - } - - static void TestNodeTableImmediateDisconnect( - const JobID &job_id, std::shared_ptr client) { - // Register callbacks for when a node gets added and removed. The latter - // event will stop the event loop. - RAY_CHECK_OK(client->node_table().SubscribeToNodeChange( - [client](const NodeID &id, const GcsNodeInfo &data) { - if (data.state() == GcsNodeInfo::ALIVE) { - NodeTableNotification(client, id, data, true); - } else { - NodeTableNotification(client, id, data, false); - test->Stop(); - } - }, - nullptr)); - // Connect to then immediately disconnect from the node table. We should - // receive notifications for the addition and removal of our own entry. - GcsNodeInfo local_node_info; - local_node_info.set_node_id(local_node_id.Binary()); - local_node_info.set_node_manager_address("127.0.0.1"); - local_node_info.set_node_manager_port(0); - local_node_info.set_object_manager_port(0); - RAY_CHECK_OK(client->node_table().Connect(local_node_info)); - RAY_CHECK_OK(client->node_table().Disconnect()); - test->Start(); - } - - static void TestNodeTableMarkDisconnected(const JobID &job_id, - std::shared_ptr client) { - GcsNodeInfo local_node_info; - local_node_info.set_node_id(local_node_id.Binary()); - local_node_info.set_node_manager_address("127.0.0.1"); - local_node_info.set_node_manager_port(0); - local_node_info.set_object_manager_port(0); - // Connect to the node table to start receiving notifications. - RAY_CHECK_OK(client->node_table().Connect(local_node_info)); - // Mark a different node as dead. - NodeID dead_node_id = NodeID::FromRandom(); - RAY_CHECK_OK(client->node_table().MarkDisconnected(dead_node_id, nullptr)); - // Make sure we only get a notification for the removal of the node we - // marked as dead. - RAY_CHECK_OK(client->node_table().SubscribeToNodeChange( - [dead_node_id](const UniqueID &id, const GcsNodeInfo &data) { - if (data.state() == GcsNodeInfo::DEAD) { - ASSERT_EQ(NodeID::FromBinary(data.node_id()), dead_node_id); - test->Stop(); - } - }, - nullptr)); - test->Start(); - } -}; - -TEST_F(TestGcsWithAsio, TestNodeTableConnect) { - test = this; - NodeTableTestHelper::TestNodeTableConnect(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestNodeTableDisconnect) { - test = this; - NodeTableTestHelper::TestNodeTableDisconnect(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestNodeTableImmediateDisconnect) { - test = this; - NodeTableTestHelper::TestNodeTableImmediateDisconnect(job_id_, client_); -} - -TEST_F(TestGcsWithAsio, TestNodeTableMarkDisconnected) { - test = this; - NodeTableTestHelper::TestNodeTableMarkDisconnected(job_id_, client_); -} - -class HashTableTestHelper { - public: - static void TestHashTable(const JobID &job_id, - std::shared_ptr client) { - uint64_t expected_count = 14; - NodeID node_id = NodeID::FromRandom(); - // Prepare the first resource map: data_map1. - DynamicResourceTable::DataMap data_map1; - auto cpu_data = std::make_shared(); - cpu_data->set_resource_capacity(100); - data_map1.emplace("CPU", cpu_data); - auto gpu_data = std::make_shared(); - gpu_data->set_resource_capacity(2); - data_map1.emplace("GPU", gpu_data); - // Prepare the second resource map: data_map2 which decreases CPU, - // increases GPU and add a new CUSTOM compared to data_map1. - DynamicResourceTable::DataMap data_map2; - auto data_cpu = std::make_shared(); - data_cpu->set_resource_capacity(50); - data_map2.emplace("CPU", data_cpu); - auto data_gpu = std::make_shared(); - data_gpu->set_resource_capacity(10); - data_map2.emplace("GPU", data_gpu); - auto data_custom = std::make_shared(); - data_custom->set_resource_capacity(2); - data_map2.emplace("CUSTOM", data_custom); - data_map2["CPU"]->set_resource_capacity(50); - // This is a common comparison function for the test. - auto compare_test = [](const DynamicResourceTable::DataMap &data1, - const DynamicResourceTable::DataMap &data2) { - ASSERT_EQ(data1.size(), data2.size()); - for (const auto &data : data1) { - auto iter = data2.find(data.first); - ASSERT_TRUE(iter != data2.end()); - ASSERT_EQ(iter->second->resource_capacity(), data.second->resource_capacity()); - } - }; - auto subscribe_callback = [](RedisGcsClient *client) { - ASSERT_TRUE(true); - test->IncrementNumCallbacks(); - }; - auto notification_callback = - [data_map1, data_map2, compare_test, expected_count]( - RedisGcsClient *client, const NodeID &id, - const std::vector &result) { - RAY_CHECK(result.size() == 1); - const ResourceChangeNotification ¬ification = result.back(); - if (notification.IsRemoved()) { - ASSERT_EQ(notification.GetData().size(), 2); - ASSERT_TRUE(notification.GetData().find("GPU") != - notification.GetData().end()); - ASSERT_TRUE( - notification.GetData().find("CUSTOM") != notification.GetData().end() || - notification.GetData().find("CPU") != notification.GetData().end()); - // The key "None-Existent" will not appear in the notification. - } else { - if (notification.GetData().size() == 2) { - compare_test(data_map1, notification.GetData()); - } else if (notification.GetData().size() == 3) { - compare_test(data_map2, notification.GetData()); - } else { - ASSERT_TRUE(false); - } - } - test->IncrementNumCallbacks(); - // It is not sure which of the notification or lookup callback will come first. - if (test->NumCallbacks() == expected_count) { - test->Stop(); - } - }; - // Step 0: Subscribe the change of the hash table. - RAY_CHECK_OK(client->resource_table().Subscribe( - job_id, NodeID::Nil(), notification_callback, subscribe_callback)); - RAY_CHECK_OK(client->resource_table().RequestNotifications(job_id, node_id, - local_node_id, nullptr)); - - // Step 1: Add elements to the hash table. - auto update_callback1 = [data_map1, compare_test]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - compare_test(data_map1, callback_data); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK( - client->resource_table().Update(job_id, node_id, data_map1, update_callback1)); - auto lookup_callback1 = [data_map1, compare_test]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - compare_test(data_map1, callback_data); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->resource_table().Lookup(job_id, node_id, lookup_callback1)); - - // Step 2: Decrease one element, increase one and add a new one. - RAY_CHECK_OK(client->resource_table().Update(job_id, node_id, data_map2, nullptr)); - auto lookup_callback2 = [data_map2, compare_test]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - compare_test(data_map2, callback_data); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->resource_table().Lookup(job_id, node_id, lookup_callback2)); - std::vector delete_keys({"GPU", "CUSTOM", "None-Existent"}); - auto remove_callback = [delete_keys](RedisGcsClient *client, const NodeID &id, - const std::vector &callback_data) { - for (size_t i = 0; i < callback_data.size(); ++i) { - // All deleting keys exist in this argument even if the key doesn't exist. - ASSERT_EQ(callback_data[i], delete_keys[i]); - } - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->resource_table().RemoveEntries(job_id, node_id, delete_keys, - remove_callback)); - DynamicResourceTable::DataMap data_map3(data_map2); - data_map3.erase("GPU"); - data_map3.erase("CUSTOM"); - auto lookup_callback3 = [data_map3, compare_test]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - compare_test(data_map3, callback_data); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->resource_table().Lookup(job_id, node_id, lookup_callback3)); - - // Step 3: Reset the the resources to data_map1. - RAY_CHECK_OK( - client->resource_table().Update(job_id, node_id, data_map1, update_callback1)); - auto lookup_callback4 = [data_map1, compare_test]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - compare_test(data_map1, callback_data); - test->IncrementNumCallbacks(); - }; - RAY_CHECK_OK(client->resource_table().Lookup(job_id, node_id, lookup_callback4)); - - // Step 4: Removing all elements will remove the home Hash table from GCS. - RAY_CHECK_OK(client->resource_table().RemoveEntries( - job_id, node_id, {"GPU", "CPU", "CUSTOM", "None-Existent"}, nullptr)); - auto lookup_callback5 = [expected_count]( - RedisGcsClient *client, const NodeID &id, - const DynamicResourceTable::DataMap &callback_data) { - ASSERT_EQ(callback_data.size(), 0); - test->IncrementNumCallbacks(); - // It is not sure which of notification or lookup callback will come first. - if (test->NumCallbacks() == expected_count) { - test->Stop(); - } - }; - RAY_CHECK_OK(client->resource_table().Lookup(job_id, node_id, lookup_callback5)); - test->Start(); - ASSERT_EQ(test->NumCallbacks(), expected_count); - } -}; - -TEST_F(TestGcsWithAsio, TestHashTable) { - test = this; - HashTableTestHelper::TestHashTable(job_id_, client_); -} - -#undef TEST_TASK_TABLE_MACRO - -} // namespace gcs -} // namespace ray - -int main(int argc, char **argv) { - InitShutdownRAII ray_log_shutdown_raii(ray::RayLog::StartRayLog, - ray::RayLog::ShutDownRayLog, argv[0], - ray::RayLogLevel::INFO, - /*log_dir=*/""); - ::testing::InitGoogleTest(&argc, argv); - RAY_CHECK(argc == 4); - ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1]; - ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2]; - ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3]; - return RUN_ALL_TESTS(); -} diff --git a/src/ray/gcs/test/redis_job_info_accessor_test.cc b/src/ray/gcs/test/redis_job_info_accessor_test.cc deleted file mode 100644 index 31dc69393..000000000 --- a/src/ray/gcs/test/redis_job_info_accessor_test.cc +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "gtest/gtest.h" -#include "ray/common/test_util.h" -#include "ray/gcs/pb_util.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/gcs/test/accessor_test_base.h" - -namespace ray { - -namespace gcs { - -class RedisJobInfoAccessorTest : public AccessorTestBase { - protected: - virtual void GenTestData() { - for (size_t i = 0; i < total_job_number_; ++i) { - JobID job_id = JobID::FromInt(i); - std::shared_ptr job_data_ptr = - CreateJobTableData(job_id, /*is_dead*/ false, /*timestamp*/ 1, - /*driver_ip_address*/ "", /*driver_pid*/ i); - id_to_data_[job_id] = job_data_ptr; - } - } - std::atomic subscribe_pending_count_{0}; - size_t total_job_number_{100}; -}; - -TEST_F(RedisJobInfoAccessorTest, AddAndSubscribe) { - JobInfoAccessor &job_accessor = gcs_client_->Jobs(); - // SubscribeAll - auto on_subscribe = [this](const JobID &job_id, const JobTableData &data) { - const auto it = id_to_data_.find(job_id); - RAY_CHECK(it != id_to_data_.end()); - if (data.is_dead()) { - --subscribe_pending_count_; - } - }; - - auto on_done = [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - }; - - ++pending_count_; - RAY_CHECK_OK(job_accessor.AsyncSubscribeAll(on_subscribe, on_done)); - - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_); - - // Register - for (const auto &item : id_to_data_) { - ++pending_count_; - RAY_CHECK_OK(job_accessor.AsyncAdd(item.second, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_); - - // Update - for (auto &item : id_to_data_) { - ++pending_count_; - ++subscribe_pending_count_; - RAY_CHECK_OK(job_accessor.AsyncMarkFinished(item.first, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_); -} - -} // namespace gcs - -} // namespace ray - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - RAY_CHECK(argc == 4); - ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1]; - ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2]; - ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3]; - return RUN_ALL_TESTS(); -} diff --git a/src/ray/gcs/test/redis_node_info_accessor_test.cc b/src/ray/gcs/test/redis_node_info_accessor_test.cc deleted file mode 100644 index e4435184e..000000000 --- a/src/ray/gcs/test/redis_node_info_accessor_test.cc +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "gtest/gtest.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/gcs/test/accessor_test_base.h" - -namespace ray { - -namespace gcs { - -class NodeDynamicResourceTest : public AccessorTestBase { - protected: - typedef NodeResourceInfoAccessor::ResourceMap ResourceMap; - virtual void GenTestData() { - for (size_t node_index = 0; node_index < node_number_; ++node_index) { - NodeID id = NodeID::FromRandom(); - ResourceMap resource_map; - for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) { - std::shared_ptr rs_data = - std::make_shared(); - rs_data->set_resource_capacity(rs_index); - std::string resource_name = std::to_string(rs_index); - resource_map[resource_name] = rs_data; - if (resource_to_delete_.empty()) { - resource_to_delete_.emplace_back(resource_name); - } - } - id_to_resource_map_[id] = std::move(resource_map); - } - } - - std::unordered_map id_to_resource_map_; - - size_t node_number_{100}; - size_t resource_type_number_{5}; - - std::vector resource_to_delete_; - - std::atomic sub_pending_count_{0}; - std::atomic do_sub_pending_count_{0}; -}; - -TEST_F(NodeDynamicResourceTest, UpdateAndGet) { - NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources(); - for (const auto &node_rs : id_to_resource_map_) { - ++pending_count_; - const NodeID &id = node_rs.first; - // Update - Status status = node_resource_accessor.AsyncUpdateResources( - node_rs.first, node_rs.second, - [this, &node_resource_accessor, id](Status status) { - RAY_CHECK_OK(status); - auto get_callback = [this, id](Status status, - const boost::optional &result) { - --pending_count_; - RAY_CHECK_OK(status); - const auto it = id_to_resource_map_.find(id); - ASSERT_TRUE(result); - ASSERT_EQ(it->second.size(), result->size()); - }; - // Get - status = node_resource_accessor.AsyncGetResources(id, get_callback); - RAY_CHECK_OK(status); - }); - } - WaitPendingDone(wait_pending_timeout_); -} - -TEST_F(NodeDynamicResourceTest, Delete) { - NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources(); - for (const auto &node_rs : id_to_resource_map_) { - ++pending_count_; - // Update - Status status = node_resource_accessor.AsyncUpdateResources( - node_rs.first, node_rs.second, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - }); - } - WaitPendingDone(wait_pending_timeout_); - - for (const auto &node_rs : id_to_resource_map_) { - ++pending_count_; - const NodeID &id = node_rs.first; - // Delete - Status status = node_resource_accessor.AsyncDeleteResources( - id, resource_to_delete_, [this, &node_resource_accessor, id](Status status) { - RAY_CHECK_OK(status); - // Get - status = node_resource_accessor.AsyncGetResources( - id, [this, id](Status status, const boost::optional &result) { - --pending_count_; - RAY_CHECK_OK(status); - const auto it = id_to_resource_map_.find(id); - ASSERT_TRUE(result); - ASSERT_EQ(it->second.size() - resource_to_delete_.size(), result->size()); - }); - }); - } - WaitPendingDone(wait_pending_timeout_); -} - -TEST_F(NodeDynamicResourceTest, Subscribe) { - NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources(); - for (const auto &node_rs : id_to_resource_map_) { - ++pending_count_; - // Update - Status status = node_resource_accessor.AsyncUpdateResources( - node_rs.first, node_rs.second, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - }); - } - WaitPendingDone(wait_pending_timeout_); - - auto subscribe = [this](const rpc::NodeResourceChange ¬ification) { - auto id = NodeID::FromBinary(notification.node_id()); - RAY_LOG(INFO) << "receive client id=" << id; - auto it = id_to_resource_map_.find(id); - ASSERT_TRUE(it != id_to_resource_map_.end()); - if (0 == notification.deleted_resources_size()) { - ASSERT_EQ(notification.updated_resources_size(), it->second.size()); - } else { - ASSERT_EQ(notification.deleted_resources_size(), resource_to_delete_.size()); - } - --sub_pending_count_; - }; - - auto done = [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - }; - - // Subscribe - ++pending_count_; - Status status = node_resource_accessor.AsyncSubscribeToResources(subscribe, done); - RAY_CHECK_OK(status); - - for (const auto &node_rs : id_to_resource_map_) { - // Delete - ++pending_count_; - ++sub_pending_count_; - Status status = node_resource_accessor.AsyncDeleteResources( - node_rs.first, resource_to_delete_, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - }); - RAY_CHECK_OK(status); - } - - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(sub_pending_count_, wait_pending_timeout_); -} - -} // namespace gcs - -} // namespace ray - -int main(int argc, char **argv) { - ::testing::InitGoogleTest(&argc, argv); - RAY_CHECK(argc == 4); - ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1]; - ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2]; - ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3]; - return RUN_ALL_TESTS(); -} diff --git a/src/ray/gcs/test/redis_object_info_accessor_test.cc b/src/ray/gcs/test/redis_object_info_accessor_test.cc deleted file mode 100644 index bbe310b97..000000000 --- a/src/ray/gcs/test/redis_object_info_accessor_test.cc +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2017 The Ray Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "gtest/gtest.h" -#include "ray/common/test_util.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_gcs_client.h" -#include "ray/gcs/test/accessor_test_base.h" - -namespace ray { - -namespace gcs { - -class RedisObjectInfoAccessorTest : public AccessorTestBase { - protected: - void GenTestData() { - for (size_t i = 0; i < object_count_; ++i) { - ObjectVector object_vec; - for (size_t j = 0; j < copy_count_; ++j) { - auto object = std::make_shared(); - NodeID node_id = NodeID::FromRandom(); - object->set_manager(node_id.Binary()); - object_vec.emplace_back(std::move(object)); - } - ObjectID id = ObjectID::FromRandom(); - object_id_to_data_[id] = object_vec; - } - } - - typedef std::vector> ObjectVector; - std::unordered_map object_id_to_data_; - - size_t object_count_{100}; - size_t copy_count_{5}; -}; - -TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) { - ObjectInfoAccessor &object_accessor = gcs_client_->Objects(); - // add && get - // add - for (const auto &elem : object_id_to_data_) { - for (const auto &item : elem.second) { - ++pending_count_; - NodeID node_id = NodeID::FromBinary(item->manager()); - RAY_CHECK_OK( - object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - })); - } - } - WaitPendingDone(wait_pending_timeout_); - // get - for (const auto &elem : object_id_to_data_) { - ++pending_count_; - size_t total_size = elem.second.size(); - RAY_CHECK_OK(object_accessor.AsyncGetLocations( - elem.first, - [this, total_size](Status status, - const boost::optional &result) { - RAY_CHECK_OK(status); - ASSERT_EQ(total_size, result->locations().size()); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - - RAY_LOG(INFO) << "Case Add && Get done."; - - // subscribe && delete - // subscribe - std::atomic sub_pending_count(0); - auto subscribe = [this, &sub_pending_count]( - const ObjectID &object_id, - const std::vector &result) { - const auto it = object_id_to_data_.find(object_id); - ASSERT_TRUE(it != object_id_to_data_.end()); - static size_t response_count = 1; - size_t cur_count = response_count <= object_count_ ? copy_count_ : 1; - ASSERT_EQ(result.size(), cur_count); - bool change_mode = response_count <= object_count_; - for (const auto &res : result) { - ASSERT_EQ(change_mode, res.is_add()); - } - ++response_count; - --sub_pending_count; - }; - for (const auto &elem : object_id_to_data_) { - ++pending_count_; - ++sub_pending_count; - RAY_CHECK_OK(object_accessor.AsyncSubscribeToLocations(elem.first, subscribe, - [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(sub_pending_count, wait_pending_timeout_); - // delete - for (const auto &elem : object_id_to_data_) { - ++pending_count_; - ++sub_pending_count; - const ObjectVector &object_vec = elem.second; - NodeID node_id = NodeID::FromBinary(object_vec[0]->manager()); - RAY_CHECK_OK( - object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) { - RAY_CHECK_OK(status); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - WaitPendingDone(sub_pending_count, wait_pending_timeout_); - // get - for (const auto &elem : object_id_to_data_) { - ++pending_count_; - size_t total_size = elem.second.size(); - RAY_CHECK_OK(object_accessor.AsyncGetLocations( - elem.first, - [this, total_size](Status status, - const boost::optional &result) { - RAY_CHECK_OK(status); - ASSERT_EQ(total_size - 1, result->locations().size()); - --pending_count_; - })); - } - WaitPendingDone(wait_pending_timeout_); - - RAY_LOG(INFO) << "Case Subscribe && Delete done."; -} - -} // namespace gcs - -} // namespace ray - -int main(int argc, char **argv) { - InitShutdownRAII ray_log_shutdown_raii(ray::RayLog::StartRayLog, - ray::RayLog::ShutDownRayLog, argv[0], - ray::RayLogLevel::INFO, - /*log_dir=*/""); - ::testing::InitGoogleTest(&argc, argv); - RAY_CHECK(argc == 4); - ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1]; - ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2]; - ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3]; - return RUN_ALL_TESTS(); -} diff --git a/src/ray/object_manager/object_directory.h b/src/ray/object_manager/object_directory.h index 7133d1e94..3ce15882b 100644 --- a/src/ray/object_manager/object_directory.h +++ b/src/ray/object_manager/object_directory.h @@ -22,7 +22,7 @@ #include "ray/common/id.h" #include "ray/common/status.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client.h" #include "ray/object_manager/format/object_manager_generated.h" namespace ray { diff --git a/src/ray/object_manager/ownership_based_object_directory.h b/src/ray/object_manager/ownership_based_object_directory.h index 68d5140b9..5b07f7999 100644 --- a/src/ray/object_manager/ownership_based_object_directory.h +++ b/src/ray/object_manager/ownership_based_object_directory.h @@ -23,7 +23,7 @@ #include "absl/container/flat_hash_map.h" #include "ray/common/id.h" #include "ray/common/status.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client.h" #include "ray/object_manager/format/object_manager_generated.h" #include "ray/object_manager/object_directory.h" #include "ray/rpc/worker/core_worker_client.h" diff --git a/src/ray/object_manager/test/object_manager_stress_test.cc b/src/ray/object_manager/test/object_manager_stress_test.cc index 83daf8297..018bc357b 100644 --- a/src/ray/object_manager/test/object_manager_stress_test.cc +++ b/src/ray/object_manager/test/object_manager_stress_test.cc @@ -20,6 +20,7 @@ #include "gtest/gtest.h" #include "ray/common/status.h" #include "ray/common/test_util.h" +#include "ray/gcs/gcs_client/service_based_gcs_client.h" #include "ray/object_manager/object_manager.h" #include "ray/util/filesystem.h" #include "src/ray/protobuf/common.pb.h" @@ -32,10 +33,24 @@ namespace ray { using rpc::GcsNodeInfo; -static inline void flushall_redis(void) { +static inline bool flushall_redis(void) { redisContext *context = redisConnect("127.0.0.1", 6379); + if (context == nullptr || context->err) { + return false; + } freeReplyObject(redisCommand(context, "FLUSHALL")); + freeReplyObject(redisCommand(context, "SET NumRedisShards 1")); + freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380")); redisFree(context); + + redisContext *shard_context = redisConnect("127.0.0.1", 6380); + if (shard_context == nullptr || shard_context->err) { + return false; + } + freeReplyObject(redisCommand(shard_context, "FLUSHALL")); + redisFree(shard_context); + + return true; } int64_t current_time_ms() { @@ -71,6 +86,7 @@ class MockServer { node_info.set_object_manager_port(object_manager_port); ray::Status status = gcs_client_->Nodes().RegisterSelf(node_info, nullptr); + std::this_thread::sleep_for(std::chrono::milliseconds(5000)); return status; } @@ -85,7 +101,7 @@ class MockServer { class TestObjectManagerBase : public ::testing::Test { public: void SetUp() { - flushall_redis(); + WaitForCondition(flushall_redis, 7000); // start store socket_name_1 = TestSetupUtil::StartObjectStore(); @@ -96,9 +112,10 @@ class TestObjectManagerBase : public ::testing::Test { int push_timeout_ms = 10000; // start first server + gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1"); gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "", - /*is_test_client=*/true); - gcs_client_1 = std::make_shared(client_options); + /*is_test_client=*/false); + gcs_client_1 = std::make_shared(client_options); RAY_CHECK_OK(gcs_client_1->Connect(main_service)); ObjectManagerConfig om_config_1; om_config_1.store_socket_name = socket_name_1; @@ -110,7 +127,7 @@ class TestObjectManagerBase : public ::testing::Test { server1.reset(new MockServer(main_service, om_config_1, gcs_client_1)); // start second server - gcs_client_2 = std::make_shared(client_options); + gcs_client_2 = std::make_shared(client_options); RAY_CHECK_OK(gcs_client_2->Connect(main_service)); ObjectManagerConfig om_config_2; om_config_2.store_socket_name = socket_name_2; @@ -139,6 +156,10 @@ class TestObjectManagerBase : public ::testing::Test { TestSetupUtil::StopObjectStore(socket_name_1); TestSetupUtil::StopObjectStore(socket_name_2); + + if (!gcs_server_socket_name_.empty()) { + TestSetupUtil::StopGcsServer(gcs_server_socket_name_); + } } ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) { @@ -172,6 +193,7 @@ class TestObjectManagerBase : public ::testing::Test { std::vector v1; std::vector v2; + std::string gcs_server_socket_name_; std::string socket_name_1; std::string socket_name_2; }; @@ -421,5 +443,6 @@ TEST_F(StressTestObjectManager, StartStressTestObjectManager) { int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); ray::TEST_STORE_EXEC_PATH = std::string(argv[1]); + ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[2]); return RUN_ALL_TESTS(); } diff --git a/src/ray/object_manager/test/object_manager_test.cc b/src/ray/object_manager/test/object_manager_test.cc index 48fa9a65a..9fbecc4ca 100644 --- a/src/ray/object_manager/test/object_manager_test.cc +++ b/src/ray/object_manager/test/object_manager_test.cc @@ -20,6 +20,7 @@ #include "gtest/gtest.h" #include "ray/common/status.h" #include "ray/common/test_util.h" +#include "ray/gcs/gcs_client/service_based_gcs_client.h" #include "ray/util/filesystem.h" #include "src/ray/protobuf/common.pb.h" @@ -38,6 +39,8 @@ using rpc::GcsNodeInfo; static inline void flushall_redis(void) { redisContext *context = redisConnect("127.0.0.1", 6379); freeReplyObject(redisCommand(context, "FLUSHALL")); + freeReplyObject(redisCommand(context, "SET NumRedisShards 1")); + freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380")); redisFree(context); } @@ -91,9 +94,10 @@ class TestObjectManagerBase : public ::testing::Test { push_timeout_ms = 1500; // start first server + gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1"); gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "", /*is_test_client=*/true); - gcs_client_1 = std::make_shared(client_options); + gcs_client_1 = std::make_shared(client_options); RAY_CHECK_OK(gcs_client_1->Connect(main_service)); ObjectManagerConfig om_config_1; om_config_1.store_socket_name = socket_name_1; @@ -105,7 +109,7 @@ class TestObjectManagerBase : public ::testing::Test { server1.reset(new MockServer(main_service, om_config_1, gcs_client_1)); // start second server - gcs_client_2 = std::make_shared(client_options); + gcs_client_2 = std::make_shared(client_options); RAY_CHECK_OK(gcs_client_2->Connect(main_service)); ObjectManagerConfig om_config_2; om_config_2.store_socket_name = socket_name_2; @@ -134,6 +138,10 @@ class TestObjectManagerBase : public ::testing::Test { TestSetupUtil::StopObjectStore(socket_name_1); TestSetupUtil::StopObjectStore(socket_name_2); + + if (!gcs_server_socket_name_.empty()) { + TestSetupUtil::StopGcsServer(gcs_server_socket_name_); + } } ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) { @@ -171,6 +179,7 @@ class TestObjectManagerBase : public ::testing::Test { std::vector v1; std::vector v2; + std::string gcs_server_socket_name_; std::string socket_name_1; std::string socket_name_2; @@ -482,5 +491,6 @@ int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); ray::TEST_STORE_EXEC_PATH = std::string(argv[1]); wait_timeout_ms = std::stoi(std::string(argv[2])); + ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[3]); return RUN_ALL_TESTS(); } diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 52e9354a2..c8dcae7f9 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -306,7 +306,7 @@ ray::Status NodeManager::RegisterGcs() { // node failure. These workers can be identified by comparing the raylet_id // in their rpc::Address to the ID of a failed raylet. const auto &worker_failure_handler = - [this](const WorkerID &id, const gcs::WorkerTableData &worker_failure_data) { + [this](const WorkerID &id, const rpc::WorkerTableData &worker_failure_data) { HandleUnexpectedWorkerFailure(worker_failure_data.worker_address()); }; RAY_CHECK_OK(gcs_client_->Workers().AsyncSubscribeToWorkerFailures( @@ -1984,8 +1984,8 @@ void NodeManager::ProcessSetResourceRequest( RAY_CHECK_OK(gcs_client_->NodeResources().AsyncDeleteResources( node_id, {resource_name}, nullptr)); } else { - std::unordered_map> data_map; - auto resource_table_data = std::make_shared(); + std::unordered_map> data_map; + auto resource_table_data = std::make_shared(); resource_table_data->set_resource_capacity(capacity); data_map.emplace(resource_name, resource_table_data); RAY_CHECK_OK( diff --git a/src/ray/raylet/raylet.cc b/src/ray/raylet/raylet.cc index 6336f3160..3a683a952 100644 --- a/src/ray/raylet/raylet.cc +++ b/src/ray/raylet/raylet.cc @@ -139,10 +139,10 @@ ray::Status Raylet::RegisterGcs() { // Add resource information. const NodeManagerConfig &node_manager_config = node_manager_.GetInitialConfig(); - std::unordered_map> resources; + std::unordered_map> resources; for (const auto &resource_pair : node_manager_config.resource_config.GetResourceMap()) { - auto resource = std::make_shared(); + auto resource = std::make_shared(); resource->set_resource_capacity(resource_pair.second); resources.emplace(resource_pair.first, resource); } diff --git a/src/ray/raylet/reconstruction_policy.h b/src/ray/raylet/reconstruction_policy.h index 2300fb1c2..e221faffe 100644 --- a/src/ray/raylet/reconstruction_policy.h +++ b/src/ray/raylet/reconstruction_policy.h @@ -20,7 +20,7 @@ #include #include "ray/common/id.h" -#include "ray/gcs/tables.h" +#include "ray/gcs/gcs_client.h" #include "ray/object_manager/object_directory.h" namespace ray { diff --git a/src/ray/raylet/reconstruction_policy_test.cc b/src/ray/raylet/reconstruction_policy_test.cc index 17b2f46d6..199e4d51e 100644 --- a/src/ray/raylet/reconstruction_policy_test.cc +++ b/src/ray/raylet/reconstruction_policy_test.cc @@ -21,9 +21,11 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "ray/gcs/callback.h" -#include "ray/gcs/redis_accessor.h" +#include "ray/gcs/gcs_client/service_based_accessor.h" +#include "ray/gcs/gcs_client/service_based_gcs_client.h" #include "ray/object_manager/object_directory.h" #include "ray/raylet/format/node_manager_generated.h" +#include "ray/raylet/reconstruction_policy.h" namespace ray { @@ -97,17 +99,18 @@ class MockObjectDirectory : public ObjectDirectoryInterface { std::unordered_map> locations_; }; -class MockNodeInfoAccessor : public gcs::RedisNodeInfoAccessor { +class MockNodeInfoAccessor : public gcs::ServiceBasedNodeInfoAccessor { public: - MockNodeInfoAccessor(gcs::RedisGcsClient *client) - : gcs::RedisNodeInfoAccessor(client) {} + MockNodeInfoAccessor(gcs::ServiceBasedGcsClient *client) + : gcs::ServiceBasedNodeInfoAccessor(client) {} bool IsRemoved(const NodeID &node_id) const override { return false; } }; -class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor { +class MockTaskInfoAccessor : public gcs::ServiceBasedTaskInfoAccessor { public: - MockTaskInfoAccessor(gcs::RedisGcsClient *client) : RedisTaskInfoAccessor(client) {} + MockTaskInfoAccessor(gcs::ServiceBasedGcsClient *client) + : ServiceBasedTaskInfoAccessor(client) {} Status AsyncSubscribeTaskLease( const TaskID &task_id, @@ -180,9 +183,9 @@ class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor { task_reconstruction_log_; }; -class MockGcs : public gcs::RedisGcsClient { +class MockGcs : public gcs::ServiceBasedGcsClient { public: - MockGcs() : gcs::RedisGcsClient(gcs::GcsClientOptions("", 0, "")){}; + MockGcs() : gcs::ServiceBasedGcsClient(gcs::GcsClientOptions("", 0, "")){}; void Init(gcs::TaskInfoAccessor *task_accessor, gcs::NodeInfoAccessor *node_accessor) { task_accessor_.reset(task_accessor); diff --git a/src/ray/raylet/task_dependency_manager.h b/src/ray/raylet/task_dependency_manager.h index d35d644e7..75654698f 100644 --- a/src/ray/raylet/task_dependency_manager.h +++ b/src/ray/raylet/task_dependency_manager.h @@ -17,7 +17,6 @@ // clang-format off #include "ray/common/id.h" #include "ray/common/task/task.h" -#include "ray/gcs/redis_gcs_client.h" #include "ray/object_manager/object_manager.h" #include "ray/raylet/reconstruction_policy.h" // clang-format on diff --git a/src/ray/raylet/task_dependency_manager_test.cc b/src/ray/raylet/task_dependency_manager_test.cc index 99f6d5622..d65b0aced 100644 --- a/src/ray/raylet/task_dependency_manager_test.cc +++ b/src/ray/raylet/task_dependency_manager_test.cc @@ -21,8 +21,6 @@ #include "gtest/gtest.h" #include "ray/common/task/task_util.h" #include "ray/common/test_util.h" -#include "ray/gcs/redis_accessor.h" -#include "ray/gcs/redis_gcs_client.h" namespace ray { diff --git a/src/ray/raylet/worker_pool.h b/src/ray/raylet/worker_pool.h index 62dfd20a5..66d4b94c7 100644 --- a/src/ray/raylet/worker_pool.h +++ b/src/ray/raylet/worker_pool.h @@ -26,7 +26,7 @@ #include "ray/common/client_connection.h" #include "ray/common/task/task.h" #include "ray/common/task/task_common.h" -#include "ray/gcs/redis_gcs_client.h" +#include "ray/gcs/gcs_client.h" #include "ray/raylet/worker.h" namespace ray { diff --git a/src/ray/test/run_object_manager_tests.sh b/src/ray/test/run_object_manager_tests.sh index 641c53050..4fba8d2dc 100755 --- a/src/ray/test/run_object_manager_tests.sh +++ b/src/ray/test/run_object_manager_tests.sh @@ -25,18 +25,22 @@ fi REDIS_MODULE="./bazel-bin/libray_redis_module.so" LOAD_MODULE_ARGS=(--loadmodule "${REDIS_MODULE}") STORE_EXEC="./bazel-bin/plasma_store_server" +GCS_SERVER_EXEC="./bazel-bin/gcs_server" # Allow cleanup commands to fail. bazel run //:redis-cli -- -p 6379 shutdown || true +bazel run //:redis-cli -- -p 6380 shutdown || true sleep 1s bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6379 & +bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6380 & sleep 1s # Run tests. -./bazel-bin/object_manager_stress_test $STORE_EXEC +./bazel-bin/object_manager_stress_test $STORE_EXEC $GCS_SERVER_EXEC sleep 1s # Use timeout=1000ms for the Wait tests. -./bazel-bin/object_manager_test $STORE_EXEC 1000 +./bazel-bin/object_manager_test $STORE_EXEC 1000 $GCS_SERVER_EXEC bazel run //:redis-cli -- -p 6379 shutdown +bazel run //:redis-cli -- -p 6380 shutdown sleep 1s # Include raylet integration test once it's ready.