[GCS] Delete redis gcs client and redis_xxx_accessor (#12996)

This commit is contained in:
DK.Pino
2020-12-23 20:31:46 +08:00
committed by GitHub
parent c4e273920f
commit 6e19facc7f
52 changed files with 131 additions and 5951 deletions
-106
View File
@@ -1020,7 +1020,6 @@ cc_test(
cc_library(
name = "gcs_test_util_lib",
hdrs = [
"src/ray/gcs/test/accessor_test_base.h",
"src/ray/gcs/test/gcs_test_util.h",
],
copts = COPTS,
@@ -1621,111 +1620,6 @@ cc_library(
],
)
# TODO(micafan) Support test group in future. Use test group we can run all gcs test once.
cc_test(
name = "redis_gcs_client_test",
srcs = ["src/ray/gcs/test/redis_gcs_client_test.cc"],
args = [
"$(location redis-server)",
"$(location redis-cli)",
"$(location libray_redis_module.so)",
],
copts = COPTS,
data = [
"//:libray_redis_module.so",
"//:redis-cli",
"//:redis-server",
],
deps = [
":gcs",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "redis_actor_info_accessor_test",
srcs = ["src/ray/gcs/test/redis_actor_info_accessor_test.cc"],
args = [
"$(location redis-server)",
"$(location redis-cli)",
"$(location libray_redis_module.so)",
],
copts = COPTS,
data = [
"//:libray_redis_module.so",
"//:redis-cli",
"//:redis-server",
],
deps = [
":gcs",
":gcs_test_util_lib",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "redis_object_info_accessor_test",
srcs = ["src/ray/gcs/test/redis_object_info_accessor_test.cc"],
args = [
"$(location redis-server)",
"$(location redis-cli)",
"$(location libray_redis_module.so)",
],
copts = COPTS,
data = [
"//:libray_redis_module.so",
"//:redis-cli",
"//:redis-server",
],
deps = [
":gcs",
":gcs_test_util_lib",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "redis_job_info_accessor_test",
srcs = ["src/ray/gcs/test/redis_job_info_accessor_test.cc"],
args = [
"$(location redis-server)",
"$(location redis-cli)",
"$(location libray_redis_module.so)",
],
copts = COPTS,
data = [
"//:libray_redis_module.so",
"//:redis-cli",
"//:redis-server",
],
deps = [
":gcs",
":gcs_test_util_lib",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "redis_node_info_accessor_test",
srcs = ["src/ray/gcs/test/redis_node_info_accessor_test.cc"],
args = [
"$(location redis-server)",
"$(location redis-cli)",
"$(location libray_redis_module.so)",
],
copts = COPTS,
data = [
"//:libray_redis_module.so",
"//:redis-cli",
"//:redis-server",
],
deps = [
":gcs",
":gcs_test_util_lib",
"@com_google_googletest//:gtest_main",
],
)
cc_test(
name = "asio_test",
srcs = ["src/ray/gcs/test/asio_test.cc"],
-1
View File
@@ -120,7 +120,6 @@ test_core() {
case "${OSTYPE}" in
msys)
args+=(
-//:redis_gcs_client_test
-//:core_worker_test
-//:event_test
-//:gcs_pub_sub_test
+2 -2
View File
@@ -45,7 +45,7 @@ ray::rpc::ActorHandle CreateInnerActorHandleFromString(const std::string &serial
}
ray::rpc::ActorHandle CreateInnerActorHandleFromActorTableData(
const ray::gcs::ActorTableData &actor_table_data) {
const ray::rpc::ActorTableData &actor_table_data) {
ray::rpc::ActorHandle inner;
inner.set_actor_id(actor_table_data.actor_id());
inner.set_owner_id(actor_table_data.parent_id());
@@ -80,7 +80,7 @@ ActorHandle::ActorHandle(
ActorHandle::ActorHandle(const std::string &serialized)
: ActorHandle(CreateInnerActorHandleFromString(serialized)) {}
ActorHandle::ActorHandle(const gcs::ActorTableData &actor_table_data)
ActorHandle::ActorHandle(const rpc::ActorTableData &actor_table_data)
: ActorHandle(CreateInnerActorHandleFromActorTableData(actor_table_data)) {}
void ActorHandle::SetActorTaskSpec(TaskSpecBuilder &builder, const ObjectID new_cursor) {
+1 -2
View File
@@ -20,7 +20,6 @@
#include "ray/common/task/task_util.h"
#include "ray/core_worker/common.h"
#include "ray/core_worker/context.h"
#include "ray/gcs/redis_gcs_client.h"
#include "src/ray/protobuf/core_worker.pb.h"
#include "src/ray/protobuf/gcs.pb.h"
@@ -42,7 +41,7 @@ class ActorHandle {
ActorHandle(const std::string &serialized);
/// Constructs an ActorHandle from a gcs::ActorTableData message.
ActorHandle(const gcs::ActorTableData &actor_table_data);
ActorHandle(const rpc::ActorTableData &actor_table_data);
ActorID GetActorID() const { return ActorID::FromBinary(inner_.actor_id()); };
+5 -6
View File
@@ -15,7 +15,6 @@
#include "ray/core_worker/actor_manager.h"
#include "ray/gcs/pb_util.h"
#include "ray/gcs/redis_accessor.h"
namespace ray {
@@ -124,8 +123,8 @@ void ActorManager::WaitForActorOutOfScope(
}
void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
const gcs::ActorTableData &actor_data) {
const auto &actor_state = gcs::ActorTableData::ActorState_Name(actor_data.state());
const rpc::ActorTableData &actor_data) {
const auto &actor_state = rpc::ActorTableData::ActorState_Name(actor_data.state());
RAY_LOG(INFO) << "received notification on actor, state: " << actor_state
<< ", actor_id: " << actor_id
<< ", ip address: " << actor_data.address().ip_address()
@@ -133,14 +132,14 @@ void ActorManager::HandleActorStateNotification(const ActorID &actor_id,
<< WorkerID::FromBinary(actor_data.address().worker_id())
<< ", raylet_id: " << NodeID::FromBinary(actor_data.address().raylet_id())
<< ", num_restarts: " << actor_data.num_restarts();
if (actor_data.state() == gcs::ActorTableData::RESTARTING) {
if (actor_data.state() == rpc::ActorTableData::RESTARTING) {
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), false);
} else if (actor_data.state() == gcs::ActorTableData::DEAD) {
} else if (actor_data.state() == rpc::ActorTableData::DEAD) {
direct_actor_submitter_->DisconnectActor(actor_id, actor_data.num_restarts(), true);
// We cannot erase the actor handle here because clients can still
// submit tasks to dead actors. This also means we defer unsubscription,
// otherwise we crash when bulk unsubscribing all actor handles.
} else if (actor_data.state() == gcs::ActorTableData::ALIVE) {
} else if (actor_data.state() == rpc::ActorTableData::ALIVE) {
direct_actor_submitter_->ConnectActor(actor_id, actor_data.address(),
actor_data.num_restarts());
} else {
+2 -2
View File
@@ -18,7 +18,7 @@
#include "ray/core_worker/actor_handle.h"
#include "ray/core_worker/reference_count.h"
#include "ray/core_worker/transport/direct_actor_transport.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client.h"
namespace ray {
@@ -177,7 +177,7 @@ class ActorManager {
/// \param[in] actor_id The actor id of this notification.
/// \param[in] actor_data The GCS actor data.
void HandleActorStateNotification(const ActorID &actor_id,
const gcs::ActorTableData &actor_data);
const rpc::ActorTableData &actor_data);
/// GCS client.
std::shared_ptr<gcs::GcsClient> gcs_client_;
+2 -2
View File
@@ -464,7 +464,7 @@ CoreWorker::CoreWorker(const CoreWorkerOptions &options, const WorkerID &worker_
TaskID::ComputeDriverTaskId(worker_context_.GetWorkerID()),
GetCallerId(), rpc_address_);
std::shared_ptr<gcs::TaskTableData> data = std::make_shared<gcs::TaskTableData>();
std::shared_ptr<rpc::TaskTableData> data = std::make_shared<rpc::TaskTableData>();
data->mutable_task()->mutable_task_spec()->CopyFrom(builder.Build().GetMessage());
if (!options_.is_local_mode) {
RAY_CHECK_OK(gcs_client_->Tasks().AsyncAdd(data, nullptr));
@@ -1639,7 +1639,7 @@ std::pair<const ActorHandle *, Status> CoreWorker::GetNamedActorHandle(
std::make_shared<std::promise<void>>(std::promise<void>());
RAY_CHECK_OK(gcs_client_->Actors().AsyncGetByName(
name, [this, &actor_id, name, ready_promise](
Status status, const boost::optional<gcs::ActorTableData> &result) {
Status status, const boost::optional<rpc::ActorTableData> &result) {
if (status.ok() && result) {
auto actor_handle = std::unique_ptr<ActorHandle>(new ActorHandle(*result));
actor_id = actor_handle->GetActorID();
+1 -2
View File
@@ -30,8 +30,7 @@
#include "ray/core_worker/store_provider/plasma_store_provider.h"
#include "ray/core_worker/transport/direct_actor_transport.h"
#include "ray/core_worker/transport/direct_task_transport.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/subscription_executor.h"
#include "ray/gcs/gcs_client.h"
#include "ray/raylet_client/raylet_client.h"
#include "ray/rpc/node_manager/node_manager_client.h"
#include "ray/rpc/worker/core_worker_client.h"
+1 -1
View File
@@ -18,7 +18,7 @@
#include "absl/synchronization/mutex.h"
#include "absl/time/clock.h"
#include "ray/core_worker/context.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client.h"
namespace ray {
+10 -12
View File
@@ -20,17 +20,17 @@
#include "ray/common/test_util.h"
#include "ray/core_worker/reference_count.h"
#include "ray/core_worker/transport/direct_actor_transport.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client/service_based_accessor.h"
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
namespace ray {
using ::testing::_;
class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
class MockActorInfoAccessor : public gcs::ServiceBasedActorInfoAccessor {
public:
MockActorInfoAccessor(gcs::RedisGcsClient *client)
: gcs::RedisActorInfoAccessor(client) {}
MockActorInfoAccessor(gcs::ServiceBasedGcsClient *client)
: gcs::ServiceBasedActorInfoAccessor(client) {}
~MockActorInfoAccessor() {}
@@ -44,7 +44,7 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
}
bool ActorStateNotificationPublished(const ActorID &actor_id,
const gcs::ActorTableData &actor_data) {
const rpc::ActorTableData &actor_data) {
auto it = callback_map_.find(actor_id);
if (it == callback_map_.end()) return false;
auto actor_state_notification_callback = it->second;
@@ -60,15 +60,13 @@ class MockActorInfoAccessor : public gcs::RedisActorInfoAccessor {
callback_map_;
};
class MockGcsClient : public gcs::RedisGcsClient {
class MockGcsClient : public gcs::ServiceBasedGcsClient {
public:
MockGcsClient(const gcs::GcsClientOptions &options) : gcs::RedisGcsClient(options) {}
MockGcsClient(gcs::GcsClientOptions options) : gcs::ServiceBasedGcsClient(options) {}
void Init(MockActorInfoAccessor *actor_accesor_mock) {
actor_accessor_.reset(actor_accesor_mock);
void Init(MockActorInfoAccessor *actor_info_accessor) {
actor_accessor_.reset(actor_info_accessor);
}
~MockGcsClient() {}
};
class MockDirectActorSubmitter : public CoreWorkerDirectActorTaskSubmitterInterface {
@@ -223,7 +223,6 @@ TEST_F(DirectActorSubmitterTest, TestActorDead) {
addr.set_worker_id(worker_id.Binary());
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
submitter_.AddActorQueueIfNotExists(actor_id);
gcs::ActorTableData actor_data;
submitter_.ConnectActor(actor_id, addr, 0);
ASSERT_EQ(worker_client_->callbacks.size(), 0);
@@ -256,7 +255,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartNoRetry) {
addr.set_worker_id(worker_id.Binary());
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
submitter_.AddActorQueueIfNotExists(actor_id);
gcs::ActorTableData actor_data;
addr.set_port(0);
submitter_.ConnectActor(actor_id, addr, 0);
ASSERT_EQ(worker_client_->callbacks.size(), 0);
@@ -299,7 +297,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartRetry) {
addr.set_worker_id(worker_id.Binary());
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
submitter_.AddActorQueueIfNotExists(actor_id);
gcs::ActorTableData actor_data;
addr.set_port(0);
submitter_.ConnectActor(actor_id, addr, 0);
ASSERT_EQ(worker_client_->callbacks.size(), 0);
@@ -351,7 +348,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderRetry) {
addr.set_worker_id(worker_id.Binary());
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
submitter_.AddActorQueueIfNotExists(actor_id);
gcs::ActorTableData actor_data;
addr.set_port(0);
submitter_.ConnectActor(actor_id, addr, 0);
ASSERT_EQ(worker_client_->callbacks.size(), 0);
@@ -401,7 +397,6 @@ TEST_F(DirectActorSubmitterTest, TestActorRestartOutOfOrderGcs) {
addr.set_worker_id(worker_id.Binary());
ActorID actor_id = ActorID::Of(JobID::FromInt(0), TaskID::Nil(), 0);
submitter_.AddActorQueueIfNotExists(actor_id);
gcs::ActorTableData actor_data;
addr.set_port(0);
submitter_.ConnectActor(actor_id, addr, 0);
ASSERT_EQ(worker_client_->callbacks.size(), 0);
@@ -32,7 +32,6 @@
#include "ray/core_worker/store_provider/memory_store/memory_store.h"
#include "ray/core_worker/task_manager.h"
#include "ray/core_worker/transport/dependency_resolver.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/grpc_server.h"
#include "ray/rpc/worker/core_worker_client.h"
@@ -225,7 +225,7 @@ std::vector<std::string> GlobalStateAccessor::GetAllWorkerInfo() {
}
bool GlobalStateAccessor::AddWorkerInfo(const std::string &serialized_string) {
auto data_ptr = std::make_shared<WorkerTableData>();
auto data_ptr = std::make_shared<rpc::WorkerTableData>();
data_ptr->ParseFromString(serialized_string);
std::promise<bool> promise;
RAY_CHECK_OK(
@@ -19,6 +19,8 @@
namespace ray {
namespace gcs {
using namespace ray::rpc;
ServiceBasedJobInfoAccessor::ServiceBasedJobInfoAccessor(
ServiceBasedGcsClient *client_impl)
: client_impl_(client_impl) {}
+14 -15
View File
@@ -16,7 +16,6 @@
#include "ray/common/task/task_spec.h"
#include "ray/gcs/accessor.h"
#include "ray/gcs/subscription_executor.h"
#include "ray/util/sequencer.h"
#include "src/ray/protobuf/gcs_service.pb.h"
@@ -38,12 +37,12 @@ class ServiceBasedJobInfoAccessor : public JobInfoAccessor {
virtual ~ServiceBasedJobInfoAccessor() = default;
Status AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
Status AsyncAdd(const std::shared_ptr<rpc::JobTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override;
Status AsyncSubscribeAll(const SubscribeCallback<JobID, JobTableData> &subscribe,
Status AsyncSubscribeAll(const SubscribeCallback<JobID, rpc::JobTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncGetAll(const MultiItemCallback<rpc::JobTableData> &callback) override;
@@ -71,7 +70,7 @@ class ServiceBasedActorInfoAccessor : public ActorInfoAccessor {
virtual ~ServiceBasedActorInfoAccessor() = default;
Status GetAll(std::vector<ActorTableData> *actor_table_data_list) override;
Status GetAll(std::vector<rpc::ActorTableData> *actor_table_data_list) override;
Status AsyncGet(const ActorID &actor_id,
const OptionalItemCallback<rpc::ActorTableData> &callback) override;
@@ -136,30 +135,30 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
virtual ~ServiceBasedNodeInfoAccessor() = default;
Status RegisterSelf(const GcsNodeInfo &local_node_info,
Status RegisterSelf(const rpc::GcsNodeInfo &local_node_info,
const StatusCallback &callback) override;
Status UnregisterSelf() override;
const NodeID &GetSelfId() const override;
const GcsNodeInfo &GetSelfInfo() const override;
const rpc::GcsNodeInfo &GetSelfInfo() const override;
Status AsyncRegister(const rpc::GcsNodeInfo &node_info,
const StatusCallback &callback) override;
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
Status AsyncGetAll(const MultiItemCallback<rpc::GcsNodeInfo> &callback) override;
Status AsyncSubscribeToNodeChange(
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const SubscribeCallback<NodeID, rpc::GcsNodeInfo> &subscribe,
const StatusCallback &done) override;
boost::optional<GcsNodeInfo> Get(const NodeID &node_id,
bool filter_dead_nodes = false) const override;
boost::optional<rpc::GcsNodeInfo> Get(const NodeID &node_id,
bool filter_dead_nodes = false) const override;
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
const std::unordered_map<NodeID, rpc::GcsNodeInfo> &GetAll() const override;
bool IsRemoved(const NodeID &node_id) const override;
@@ -207,21 +206,21 @@ class ServiceBasedNodeInfoAccessor : public NodeInfoAccessor {
/// from a failure.
rpc::ReportResourceUsageRequest cached_resource_usage_ GUARDED_BY(mutex_);
void HandleNotification(const GcsNodeInfo &node_info);
void HandleNotification(const rpc::GcsNodeInfo &node_info);
ServiceBasedGcsClient *client_impl_;
using NodeChangeCallback =
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
std::function<void(const NodeID &id, const rpc::GcsNodeInfo &node_info)>;
GcsNodeInfo local_node_info_;
rpc::GcsNodeInfo local_node_info_;
NodeID local_node_id_;
/// The callback to call when a new node is added or a node is removed.
NodeChangeCallback node_change_callback_{nullptr};
/// A cache for information about all nodes.
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
std::unordered_map<NodeID, rpc::GcsNodeInfo> node_cache_;
/// The set of removed nodes.
std::unordered_set<NodeID> removed_nodes_;
};
@@ -37,21 +37,23 @@ Status ServiceBasedGcsClient::Connect(boost::asio::io_service &io_service) {
return Status::Invalid("gcs service address is invalid!");
}
// Connect to gcs.
redis_gcs_client_.reset(new RedisGcsClient(options_));
RAY_CHECK_OK(redis_gcs_client_->Connect(io_service));
// Connect to redis.
RedisClientOptions redis_client_options(options_.server_ip_, options_.server_port_,
options_.password_, options_.is_test_client_);
redis_client_.reset(new RedisClient(redis_client_options));
RAY_CHECK_OK(redis_client_->Connect(io_service));
// Init gcs pub sub instance.
gcs_pub_sub_.reset(new GcsPubSub(redis_gcs_client_->GetRedisClient()));
gcs_pub_sub_.reset(new GcsPubSub(redis_client_));
// Get gcs service address.
get_server_address_func_ = [this](std::pair<std::string, int> *address) {
return GetGcsServerAddressFromRedis(
redis_gcs_client_->primary_context()->sync_context(), address);
redis_client_->GetPrimaryContext()->sync_context(), address);
};
std::pair<std::string, int> address;
RAY_CHECK(GetGcsServerAddressFromRedis(
redis_gcs_client_->primary_context()->sync_context(), &address,
redis_client_->GetPrimaryContext()->sync_context(), &address,
RayConfig::instance().gcs_service_connect_retries()))
<< "Failed to get gcs server address when init gcs client.";
@@ -96,8 +98,8 @@ void ServiceBasedGcsClient::Disconnect() {
is_connected_ = false;
detect_timer_->cancel();
gcs_pub_sub_.reset();
redis_gcs_client_->Disconnect();
redis_gcs_client_.reset();
redis_client_->Disconnect();
redis_client_.reset();
RAY_LOG(DEBUG) << "ServiceBasedGcsClient Disconnected.";
}
@@ -14,8 +14,9 @@
#pragma once
#include "ray/gcs/gcs_client.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/redis_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_client.h"
namespace ray {
@@ -31,8 +32,6 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient {
GcsPubSub &GetGcsPubSub() { return *gcs_pub_sub_; }
RedisGcsClient &GetRedisGcsClient() { return *redis_gcs_client_; }
rpc::GcsRpcClient &GetGcsRpcClient() { return *gcs_rpc_client_; }
private:
@@ -59,7 +58,7 @@ class RAY_EXPORT ServiceBasedGcsClient : public GcsClient {
/// Reconnect to GCS RPC server.
void ReconnectGcsServer();
std::unique_ptr<RedisGcsClient> redis_gcs_client_;
std::shared_ptr<RedisClient> redis_client_;
std::unique_ptr<GcsPubSub> gcs_pub_sub_;
@@ -24,7 +24,6 @@
#include "ray/gcs/gcs_server/gcs_init_data.h"
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
#include "ray/rpc/worker/core_worker_client.h"
#include "src/ray/protobuf/gcs_service.pb.h"
-1
View File
@@ -17,7 +17,6 @@
#include "ray/gcs/gcs_server/gcs_object_manager.h"
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
namespace ray {
@@ -18,7 +18,6 @@
#include "ray/gcs/gcs_server/gcs_node_manager.h"
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
namespace ray {
+9 -10
View File
@@ -43,23 +43,22 @@ GcsServer::~GcsServer() { Stop(); }
void GcsServer::Start() {
// Init backend client.
GcsClientOptions options(config_.redis_address, config_.redis_port,
config_.redis_password, config_.is_test);
redis_gcs_client_ = std::make_shared<RedisGcsClient>(options);
auto status = redis_gcs_client_->Connect(main_service_);
RedisClientOptions redis_client_options(config_.redis_address, config_.redis_port,
config_.redis_password, config_.is_test);
redis_client_ = std::make_shared<RedisClient>(redis_client_options);
auto status = redis_client_->Connect(main_service_);
RAY_CHECK(status.ok()) << "Failed to init redis gcs client as " << status;
// Init redis failure detector.
gcs_redis_failure_detector_ = std::make_shared<GcsRedisFailureDetector>(
main_service_, redis_gcs_client_->primary_context(), [this]() { Stop(); });
main_service_, redis_client_->GetPrimaryContext(), [this]() { Stop(); });
gcs_redis_failure_detector_->Start();
// Init gcs pub sub instance.
gcs_pub_sub_ = std::make_shared<gcs::GcsPubSub>(redis_gcs_client_->GetRedisClient());
gcs_pub_sub_ = std::make_shared<gcs::GcsPubSub>(redis_client_);
// Init gcs table storage.
gcs_table_storage_ =
std::make_shared<gcs::RedisGcsTableStorage>(redis_gcs_client_->GetRedisClient());
gcs_table_storage_ = std::make_shared<gcs::RedisGcsTableStorage>(redis_client_);
// Load gcs tables data asynchronously.
auto gcs_init_data = std::make_shared<GcsInitData>(gcs_table_storage_);
@@ -132,7 +131,7 @@ void GcsServer::Stop() {
}
void GcsServer::InitGcsNodeManager(const GcsInitData &gcs_init_data) {
RAY_CHECK(redis_gcs_client_ && gcs_table_storage_ && gcs_pub_sub_);
RAY_CHECK(redis_client_ && gcs_table_storage_ && gcs_pub_sub_);
gcs_node_manager_ =
std::make_shared<GcsNodeManager>(main_service_, gcs_pub_sub_, gcs_table_storage_);
// Initialize by gcs tables data.
@@ -255,7 +254,7 @@ void GcsServer::StoreGcsServerAddressInRedis() {
std::string address = ip + ":" + std::to_string(GetPort());
RAY_LOG(INFO) << "Gcs server address = " << address;
RAY_CHECK_OK(redis_gcs_client_->primary_context()->RunArgvAsync(
RAY_CHECK_OK(redis_client_->GetPrimaryContext()->RunArgvAsync(
{"SET", "GcsServerAddress", address}));
RAY_LOG(INFO) << "Finished setting gcs server address: " << address;
}
+2 -2
View File
@@ -21,7 +21,7 @@
#include "ray/gcs/gcs_server/gcs_resource_manager.h"
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/redis_client.h"
#include "ray/rpc/client_call.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
#include "ray/rpc/node_manager/node_manager_client_pool.h"
@@ -176,7 +176,7 @@ class GcsServer {
/// Placement Group info handler and service
std::unique_ptr<rpc::PlacementGroupInfoGrpcService> placement_group_info_service_;
/// Backend client
std::shared_ptr<RedisGcsClient> redis_gcs_client_;
std::shared_ptr<RedisClient> redis_client_;
/// A publisher for publishing gcs messages.
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
/// The gcs table storage.
@@ -16,7 +16,6 @@
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
namespace ray {
@@ -16,7 +16,6 @@
#include "ray/common/ray_config.h"
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
namespace ray {
@@ -16,7 +16,6 @@
#include "ray/gcs/gcs_server/gcs_table_storage.h"
#include "ray/gcs/pubsub/gcs_pub_sub.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
namespace ray {
@@ -86,7 +86,6 @@ class GcsObjectManagerTest : public ::testing::Test {
boost::asio::io_service io_service_;
std::shared_ptr<gcs::GcsResourceManager> gcs_resource_manager_;
std::shared_ptr<gcs::GcsNodeManager> gcs_node_manager_;
std::shared_ptr<gcs::RedisGcsClient> gcs_client_;
std::shared_ptr<gcs::GcsPubSub> gcs_pub_sub_;
std::shared_ptr<MockedGcsObjectManager> gcs_object_manager_;
std::shared_ptr<gcs::GcsTableStorage> gcs_table_storage_;
-697
View File
@@ -1,697 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ray/gcs/redis_accessor.h"
#include <boost/none.hpp>
#include "ray/gcs/pb_util.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/util/logging.h"
namespace ray {
namespace gcs {
RedisLogBasedActorInfoAccessor::RedisLogBasedActorInfoAccessor(
RedisGcsClient *client_impl)
: client_impl_(client_impl),
log_based_actor_sub_executor_(client_impl_->log_based_actor_table()) {}
std::vector<ActorID> RedisLogBasedActorInfoAccessor::GetAllActorID() const {
return client_impl_->log_based_actor_table().GetAllActorID();
}
Status RedisLogBasedActorInfoAccessor::Get(const ActorID &actor_id,
ActorTableData *actor_table_data) const {
return client_impl_->log_based_actor_table().Get(actor_id, actor_table_data);
}
Status RedisLogBasedActorInfoAccessor::GetAll(
std::vector<ActorTableData> *actor_table_data_list) {
RAY_CHECK(actor_table_data_list);
auto actor_id_list = GetAllActorID();
actor_table_data_list->resize(actor_id_list.size());
for (size_t i = 0; i < actor_id_list.size(); ++i) {
RAY_CHECK_OK(Get(actor_id_list[i], &(*actor_table_data_list)[i]));
}
return Status::OK();
}
Status RedisLogBasedActorInfoAccessor::AsyncGet(
const ActorID &actor_id, const OptionalItemCallback<ActorTableData> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id,
const std::vector<ActorTableData> &data) {
boost::optional<ActorTableData> result;
if (!data.empty()) {
result = data.back();
}
callback(Status::OK(), result);
};
return client_impl_->log_based_actor_table().Lookup(actor_id.JobId(), actor_id,
on_done);
}
Status RedisLogBasedActorInfoAccessor::AsyncRegisterActor(
const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) {
const std::string error_msg =
"Unsupported method of AsyncRegisterActor in RedisLogBasedActorInfoAccessor.";
RAY_LOG(FATAL) << error_msg;
return Status::Invalid(error_msg);
}
Status RedisLogBasedActorInfoAccessor::AsyncCreateActor(
const ray::TaskSpecification &task_spec, const ray::gcs::StatusCallback &callback) {
const std::string error_msg =
"Unsupported method of AsyncCreateActor in "
"RedisLogBasedActorInfoAccessor.";
RAY_LOG(FATAL) << error_msg;
return Status::Invalid(error_msg);
}
Status RedisLogBasedActorInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return log_based_actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisLogBasedActorInfoAccessor::AsyncSubscribe(
const ActorID &actor_id, const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return log_based_actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe,
done);
}
Status RedisLogBasedActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) {
return log_based_actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr);
}
RedisActorInfoAccessor::RedisActorInfoAccessor(RedisGcsClient *client_impl)
: RedisLogBasedActorInfoAccessor(client_impl),
actor_sub_executor_(client_impl_->actor_table()) {}
std::vector<ActorID> RedisActorInfoAccessor::GetAllActorID() const {
return client_impl_->actor_table().GetAllActorID();
}
Status RedisActorInfoAccessor::Get(const ActorID &actor_id,
ActorTableData *actor_table_data) const {
return client_impl_->actor_table().Get(actor_id, actor_table_data);
}
Status RedisActorInfoAccessor::AsyncGet(
const ActorID &actor_id, const OptionalItemCallback<ActorTableData> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const ActorID &actor_id,
const ActorTableData &data) { callback(Status::OK(), data); };
auto on_failure = [callback](RedisGcsClient *client, const ActorID &actor_id) {
if (callback != nullptr) {
callback(Status::Invalid("Get actor failed."), boost::none);
}
};
return client_impl_->actor_table().Lookup(JobID::Nil(), actor_id, on_done, on_failure);
}
Status RedisActorInfoAccessor::AsyncGetAll(
const MultiItemCallback<rpc::ActorTableData> &callback) {
RAY_CHECK(callback != nullptr);
auto actor_id_list = GetAllActorID();
if (actor_id_list.empty()) {
callback(Status::OK(), std::vector<rpc::ActorTableData>());
return Status::OK();
}
auto finished_count = std::make_shared<int>(0);
auto result = std::make_shared<std::vector<ActorTableData>>();
int size = actor_id_list.size();
for (auto &actor_id : actor_id_list) {
auto on_done = [finished_count, size, result, callback](
const Status &status,
const boost::optional<ActorTableData> &data) {
++(*finished_count);
if (data) {
result->push_back(*data);
}
if (*finished_count == size) {
callback(Status::OK(), *result);
}
};
RAY_CHECK_OK(AsyncGet(actor_id, on_done));
}
return Status::OK();
}
Status RedisActorInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return actor_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisActorInfoAccessor::AsyncSubscribe(
const ActorID &actor_id, const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return actor_sub_executor_.AsyncSubscribe(subscribe_id_, actor_id, subscribe, done);
}
Status RedisActorInfoAccessor::AsyncUnsubscribe(const ActorID &actor_id) {
return actor_sub_executor_.AsyncUnsubscribe(subscribe_id_, actor_id, nullptr);
}
RedisJobInfoAccessor::RedisJobInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl), job_sub_executor_(client_impl->job_table()) {}
Status RedisJobInfoAccessor::AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
const StatusCallback &callback) {
return DoAsyncAppend(data_ptr, callback);
}
Status RedisJobInfoAccessor::AsyncMarkFinished(const JobID &job_id,
const StatusCallback &callback) {
std::shared_ptr<JobTableData> data_ptr =
CreateJobTableData(job_id, /*is_dead*/ true, /*time_stamp*/ std::time(nullptr),
/*driver_ip_address*/ "", /*driver_pid*/ -1);
return DoAsyncAppend(data_ptr, callback);
}
Status RedisJobInfoAccessor::DoAsyncAppend(const std::shared_ptr<JobTableData> &data_ptr,
const StatusCallback &callback) {
JobTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const JobID &job_id,
const JobTableData &data) { callback(Status::OK()); };
}
JobID job_id = JobID::FromBinary(data_ptr->job_id());
return client_impl_->job_table().Append(job_id, job_id, data_ptr, on_done);
}
Status RedisJobInfoAccessor::AsyncSubscribeAll(
const SubscribeCallback<JobID, JobTableData> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return job_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
RedisTaskInfoAccessor::RedisTaskInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl),
task_sub_executor_(client_impl->raylet_task_table()),
task_lease_sub_executor_(client_impl->task_lease_table()) {}
Status RedisTaskInfoAccessor::AsyncAdd(const std::shared_ptr<TaskTableData> &data_ptr,
const StatusCallback &callback) {
raylet::TaskTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const TaskID &task_id,
const TaskTableData &data) { callback(Status::OK()); };
}
TaskID task_id = TaskID::FromBinary(data_ptr->task().task_spec().task_id());
raylet::TaskTable &task_table = client_impl_->raylet_task_table();
return task_table.Add(task_id.JobId(), task_id, data_ptr, on_done);
}
Status RedisTaskInfoAccessor::AsyncGet(
const TaskID &task_id, const OptionalItemCallback<TaskTableData> &callback) {
RAY_CHECK(callback != nullptr);
auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id,
const TaskTableData &data) {
boost::optional<TaskTableData> result(data);
callback(Status::OK(), result);
};
auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) {
boost::optional<TaskTableData> result;
callback(Status::Invalid("Task not exist."), result);
};
raylet::TaskTable &task_table = client_impl_->raylet_task_table();
return task_table.Lookup(task_id.JobId(), task_id, on_success, on_failure);
}
Status RedisTaskInfoAccessor::AsyncSubscribe(
const TaskID &task_id, const SubscribeCallback<TaskID, TaskTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return task_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done);
}
Status RedisTaskInfoAccessor::AsyncUnsubscribe(const TaskID &task_id) {
return task_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr);
}
Status RedisTaskInfoAccessor::AsyncAddTaskLease(
const std::shared_ptr<TaskLeaseData> &data_ptr, const StatusCallback &callback) {
TaskLeaseTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const TaskID &id,
const TaskLeaseData &data) { callback(Status::OK()); };
}
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
TaskLeaseTable &task_lease_table = client_impl_->task_lease_table();
return task_lease_table.Add(task_id.JobId(), task_id, data_ptr, on_done);
}
Status RedisTaskInfoAccessor::AsyncGetTaskLease(
const TaskID &task_id, const OptionalItemCallback<TaskLeaseData> &callback) {
RAY_CHECK(callback != nullptr);
auto on_success = [callback](RedisGcsClient *client, const TaskID &task_id,
const TaskLeaseData &data) {
boost::optional<TaskLeaseData> result(data);
callback(Status::OK(), result);
};
auto on_failure = [callback](RedisGcsClient *client, const TaskID &task_id) {
boost::optional<TaskLeaseData> result;
callback(Status::Invalid("Task lease not exist."), result);
};
TaskLeaseTable &task_lease_table = client_impl_->task_lease_table();
return task_lease_table.Lookup(task_id.JobId(), task_id, on_success, on_failure);
}
Status RedisTaskInfoAccessor::AsyncSubscribeTaskLease(
const TaskID &task_id,
const SubscribeCallback<TaskID, boost::optional<TaskLeaseData>> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return task_lease_sub_executor_.AsyncSubscribe(subscribe_id_, task_id, subscribe, done);
}
Status RedisTaskInfoAccessor::AsyncUnsubscribeTaskLease(const TaskID &task_id) {
return task_lease_sub_executor_.AsyncUnsubscribe(subscribe_id_, task_id, nullptr);
}
Status RedisTaskInfoAccessor::AttemptTaskReconstruction(
const std::shared_ptr<TaskReconstructionData> &data_ptr,
const StatusCallback &callback) {
TaskReconstructionLog::WriteCallback on_success = nullptr;
TaskReconstructionLog::WriteCallback on_failure = nullptr;
if (callback != nullptr) {
on_success = [callback](RedisGcsClient *client, const TaskID &id,
const TaskReconstructionData &data) {
callback(Status::OK());
};
on_failure = [callback](RedisGcsClient *client, const TaskID &id,
const TaskReconstructionData &data) {
callback(Status::Invalid("Updating task reconstruction failed."));
};
}
TaskID task_id = TaskID::FromBinary(data_ptr->task_id());
int reconstruction_attempt = data_ptr->num_reconstructions();
TaskReconstructionLog &task_reconstruction_log =
client_impl_->task_reconstruction_log();
return task_reconstruction_log.AppendAt(task_id.JobId(), task_id, data_ptr, on_success,
on_failure, reconstruction_attempt);
}
RedisObjectInfoAccessor::RedisObjectInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl), object_sub_executor_(client_impl->object_table()) {}
Status RedisObjectInfoAccessor::AsyncGetLocations(
const ObjectID &object_id,
const OptionalItemCallback<rpc::ObjectLocationInfo> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
const std::vector<ObjectTableData> &data) {
rpc::ObjectLocationInfo info;
info.set_object_id(object_id.Binary());
for (const auto &item : data) {
auto item_ptr = info.add_locations();
item_ptr->CopyFrom(item);
}
callback(Status::OK(), info);
};
ObjectTable &object_table = client_impl_->object_table();
return object_table.Lookup(object_id.TaskId().JobId(), object_id, on_done);
}
Status RedisObjectInfoAccessor::AsyncAddLocation(const ObjectID &object_id,
const NodeID &node_id,
const StatusCallback &callback) {
std::function<void(RedisGcsClient * client, const ObjectID &id,
const ObjectTableData &data)>
on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
const ObjectTableData &data) { callback(Status::OK()); };
}
std::shared_ptr<ObjectTableData> data_ptr = std::make_shared<ObjectTableData>();
data_ptr->set_manager(node_id.Binary());
ObjectTable &object_table = client_impl_->object_table();
return object_table.Add(object_id.TaskId().JobId(), object_id, data_ptr, on_done);
}
Status RedisObjectInfoAccessor::AsyncRemoveLocation(const ObjectID &object_id,
const NodeID &node_id,
const StatusCallback &callback) {
std::function<void(RedisGcsClient * client, const ObjectID &id,
const ObjectTableData &data)>
on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const ObjectID &object_id,
const ObjectTableData &data) { callback(Status::OK()); };
}
std::shared_ptr<ObjectTableData> data_ptr = std::make_shared<ObjectTableData>();
data_ptr->set_manager(node_id.Binary());
ObjectTable &object_table = client_impl_->object_table();
return object_table.Remove(object_id.TaskId().JobId(), object_id, data_ptr, on_done);
}
Status RedisObjectInfoAccessor::AsyncSubscribeToLocations(
const ObjectID &object_id,
const SubscribeCallback<ObjectID, std::vector<rpc::ObjectLocationChange>> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return object_sub_executor_.AsyncSubscribe(
subscribe_id_, object_id,
[subscribe](const ObjectID &id, const ObjectChangeNotification &notification_data) {
std::vector<rpc::ObjectLocationChange> updates;
for (const auto &item : notification_data.GetData()) {
rpc::ObjectLocationChange update;
update.set_is_add(notification_data.IsAdded());
update.set_node_id(item.manager());
updates.push_back(update);
}
subscribe(id, updates);
},
done);
}
Status RedisObjectInfoAccessor::AsyncUnsubscribeToLocations(const ObjectID &object_id) {
return object_sub_executor_.AsyncUnsubscribe(subscribe_id_, object_id, nullptr);
}
RedisNodeInfoAccessor::RedisNodeInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl),
resource_usage_batch_sub_executor_(client_impl->resource_usage_batch_table()) {}
Status RedisNodeInfoAccessor::RegisterSelf(const GcsNodeInfo &local_node_info,
const StatusCallback &callback) {
NodeTable &node_table = client_impl_->node_table();
Status status = node_table.Connect(local_node_info);
if (callback != nullptr) {
callback(Status::OK());
}
return status;
}
Status RedisNodeInfoAccessor::UnregisterSelf() {
NodeTable &node_table = client_impl_->node_table();
return node_table.Disconnect();
}
const NodeID &RedisNodeInfoAccessor::GetSelfId() const {
NodeTable &node_table = client_impl_->node_table();
return node_table.GetLocalNodeId();
}
const GcsNodeInfo &RedisNodeInfoAccessor::GetSelfInfo() const {
NodeTable &node_table = client_impl_->node_table();
return node_table.GetLocalNode();
}
Status RedisNodeInfoAccessor::AsyncRegister(const GcsNodeInfo &node_info,
const StatusCallback &callback) {
NodeTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const NodeID &id,
const GcsNodeInfo &data) { callback(Status::OK()); };
}
NodeTable &node_table = client_impl_->node_table();
return node_table.MarkConnected(node_info, on_done);
}
Status RedisNodeInfoAccessor::AsyncUnregister(const NodeID &node_id,
const StatusCallback &callback) {
NodeTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const NodeID &id,
const GcsNodeInfo &data) { callback(Status::OK()); };
}
NodeTable &node_table = client_impl_->node_table();
return node_table.MarkDisconnected(node_id, on_done);
}
Status RedisNodeInfoAccessor::AsyncSubscribeToNodeChange(
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
NodeTable &node_table = client_impl_->node_table();
return node_table.SubscribeToNodeChange(subscribe, done);
}
Status RedisNodeInfoAccessor::AsyncGetAll(
const MultiItemCallback<GcsNodeInfo> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
const std::vector<GcsNodeInfo> &data) {
std::vector<GcsNodeInfo> result;
std::set<std::string> node_ids;
for (int index = data.size() - 1; index >= 0; --index) {
if (node_ids.insert(data[index].node_id()).second) {
result.emplace_back(data[index]);
}
}
callback(Status::OK(), result);
};
NodeTable &node_table = client_impl_->node_table();
return node_table.Lookup(on_done);
}
boost::optional<GcsNodeInfo> RedisNodeInfoAccessor::Get(const NodeID &node_id,
bool filter_dead_nodes) const {
GcsNodeInfo node_info;
NodeTable &node_table = client_impl_->node_table();
bool found = node_table.GetNode(node_id, &node_info);
boost::optional<GcsNodeInfo> optional_node;
if (found) {
optional_node = std::move(node_info);
}
return optional_node;
}
const std::unordered_map<NodeID, GcsNodeInfo> &RedisNodeInfoAccessor::GetAll() const {
NodeTable &node_table = client_impl_->node_table();
return node_table.GetAllNodes();
}
bool RedisNodeInfoAccessor::IsRemoved(const NodeID &node_id) const {
NodeTable &node_table = client_impl_->node_table();
return node_table.IsRemoved(node_id);
}
Status RedisNodeInfoAccessor::AsyncReportHeartbeat(
const std::shared_ptr<HeartbeatTableData> &data_ptr, const StatusCallback &callback) {
HeartbeatTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const HeartbeatTableData &data) { callback(Status::OK()); };
}
NodeID node_id = NodeID::FromBinary(data_ptr->node_id());
HeartbeatTable &heartbeat_table = client_impl_->heartbeat_table();
return heartbeat_table.Add(JobID::Nil(), node_id, data_ptr, on_done);
}
Status RedisNodeInfoAccessor::AsyncReportResourceUsage(
const std::shared_ptr<rpc::ResourcesData> &data_ptr, const StatusCallback &callback) {
return Status::Invalid("Not implemented");
}
void RedisNodeInfoAccessor::AsyncReReportResourceUsage() {}
Status RedisNodeInfoAccessor::AsyncSubscribeBatchedResourceUsage(
const ItemCallback<ResourceUsageBatchData> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
auto on_subscribe = [subscribe](const NodeID &node_id,
const ResourceUsageBatchData &data) {
subscribe(data);
};
return resource_usage_batch_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe,
done);
}
RedisNodeResourceInfoAccessor::RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl), resource_sub_executor_(client_impl_->resource_table()) {}
Status RedisNodeResourceInfoAccessor::AsyncGetResources(
const NodeID &node_id, const OptionalItemCallback<ResourceMap> &callback) {
RAY_CHECK(callback != nullptr);
auto on_done = [callback](RedisGcsClient *client, const NodeID &id,
const ResourceMap &data) {
boost::optional<ResourceMap> result;
if (!data.empty()) {
result = data;
}
callback(Status::OK(), result);
};
DynamicResourceTable &resource_table = client_impl_->resource_table();
return resource_table.Lookup(JobID::Nil(), node_id, on_done);
}
Status RedisNodeResourceInfoAccessor::AsyncUpdateResources(
const NodeID &node_id, const ResourceMap &resources, const StatusCallback &callback) {
Hash<NodeID, ResourceTableData>::HashCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const ResourceMap &resources) { callback(Status::OK()); };
}
DynamicResourceTable &resource_table = client_impl_->resource_table();
return resource_table.Update(JobID::Nil(), node_id, resources, on_done);
}
Status RedisNodeResourceInfoAccessor::AsyncDeleteResources(
const NodeID &node_id, const std::vector<std::string> &resource_names,
const StatusCallback &callback) {
Hash<NodeID, ResourceTableData>::HashRemoveCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const NodeID &node_id,
const std::vector<std::string> &resource_names) {
callback(Status::OK());
};
}
DynamicResourceTable &resource_table = client_impl_->resource_table();
return resource_table.RemoveEntries(JobID::Nil(), node_id, resource_names, on_done);
}
Status RedisNodeResourceInfoAccessor::AsyncSubscribeToResources(
const ItemCallback<rpc::NodeResourceChange> &subscribe, const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
auto on_subscribe = [subscribe](const NodeID &id,
const ResourceChangeNotification &result) {
rpc::NodeResourceChange node_resource_change;
node_resource_change.set_node_id(id.Binary());
if (result.IsAdded()) {
for (auto &it : result.GetData()) {
(*node_resource_change.mutable_updated_resources())[it.first] =
it.second->resource_capacity();
}
} else {
for (auto &it : result.GetData()) {
node_resource_change.add_deleted_resources(it.first);
}
}
subscribe(node_resource_change);
};
return resource_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), on_subscribe, done);
}
RedisErrorInfoAccessor::RedisErrorInfoAccessor(RedisGcsClient *client_impl) {}
Status RedisErrorInfoAccessor::AsyncReportJobError(
const std::shared_ptr<ErrorTableData> &data_ptr, const StatusCallback &callback) {
return Status::Invalid("Not implemented");
}
RedisStatsInfoAccessor::RedisStatsInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl) {}
Status RedisStatsInfoAccessor::AsyncAddProfileData(
const std::shared_ptr<ProfileTableData> &data_ptr, const StatusCallback &callback) {
ProfileTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const UniqueID &id,
const ProfileTableData &data) { callback(Status::OK()); };
}
ProfileTable &profile_table = client_impl_->profile_table();
return profile_table.Append(JobID::Nil(), UniqueID::FromRandom(), data_ptr, on_done);
}
RedisWorkerInfoAccessor::RedisWorkerInfoAccessor(RedisGcsClient *client_impl)
: client_impl_(client_impl),
worker_failure_sub_executor_(client_impl->worker_table()) {}
Status RedisWorkerInfoAccessor::AsyncSubscribeToWorkerFailures(
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
const StatusCallback &done) {
RAY_CHECK(subscribe != nullptr);
return worker_failure_sub_executor_.AsyncSubscribeAll(NodeID::Nil(), subscribe, done);
}
Status RedisWorkerInfoAccessor::AsyncReportWorkerFailure(
const std::shared_ptr<WorkerTableData> &data_ptr, const StatusCallback &callback) {
WorkerTable::WriteCallback on_done = nullptr;
if (callback != nullptr) {
on_done = [callback](RedisGcsClient *client, const WorkerID &id,
const WorkerTableData &data) { callback(Status::OK()); };
}
WorkerID worker_id = WorkerID::FromBinary(data_ptr->worker_address().worker_id());
WorkerTable &worker_failure_table = client_impl_->worker_table();
return worker_failure_table.Add(JobID::Nil(), worker_id, data_ptr, on_done);
}
Status RedisWorkerInfoAccessor::AsyncGet(
const WorkerID &worker_id,
const OptionalItemCallback<rpc::WorkerTableData> &callback) {
return Status::Invalid("Not implemented");
}
Status RedisWorkerInfoAccessor::AsyncGetAll(
const MultiItemCallback<rpc::WorkerTableData> &callback) {
return Status::Invalid("Not implemented");
}
Status RedisWorkerInfoAccessor::AsyncAdd(
const std::shared_ptr<rpc::WorkerTableData> &data_ptr,
const StatusCallback &callback) {
return Status::Invalid("Not implemented");
}
Status RedisPlacementGroupInfoAccessor::AsyncCreatePlacementGroup(
const PlacementGroupSpecification &placement_group_spec) {
return Status::Invalid("Not implemented");
}
Status RedisPlacementGroupInfoAccessor::AsyncRemovePlacementGroup(
const PlacementGroupID &placement_group_id, const StatusCallback &callback) {
return Status::Invalid("Not implemented");
}
Status RedisPlacementGroupInfoAccessor::AsyncGet(
const PlacementGroupID &placement_group_id,
const OptionalItemCallback<rpc::PlacementGroupTableData> &callback) {
return Status::Invalid("Not implemented");
}
Status RedisPlacementGroupInfoAccessor::AsyncGetAll(
const MultiItemCallback<rpc::PlacementGroupTableData> &callback) {
return Status::Invalid("Not implemented");
}
Status RedisPlacementGroupInfoAccessor::AsyncWaitUntilReady(
const PlacementGroupID &placement_group_id, const StatusCallback &callback) {
return Status::Invalid("Not implemented");
}
} // namespace gcs
} // namespace ray
-491
View File
@@ -1,491 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "ray/common/id.h"
#include "ray/common/task/task_spec.h"
#include "ray/gcs/accessor.h"
#include "ray/gcs/callback.h"
#include "ray/gcs/subscription_executor.h"
#include "ray/gcs/tables.h"
namespace ray {
namespace gcs {
class RedisGcsClient;
/// \class RedisLogBasedActorInfoAccessor
/// `RedisLogBasedActorInfoAccessor` is an implementation of `ActorInfoAccessor`
/// that uses Redis as the backend storage.
class RedisLogBasedActorInfoAccessor : public ActorInfoAccessor {
public:
explicit RedisLogBasedActorInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisLogBasedActorInfoAccessor() {}
Status GetAll(std::vector<ActorTableData> *actor_table_data_list) override;
Status AsyncGet(const ActorID &actor_id,
const OptionalItemCallback<ActorTableData> &callback) override;
Status AsyncGetAll(const MultiItemCallback<rpc::ActorTableData> &callback) override {
return Status::NotImplemented(
"RedisLogBasedActorInfoAccessor does not support AsyncGetAll.");
}
Status AsyncGetByName(const std::string &name,
const OptionalItemCallback<ActorTableData> &callback) override {
return Status::NotImplemented(
"RedisLogBasedActorInfoAccessor does not support named detached actors.");
}
Status AsyncRegisterActor(const TaskSpecification &task_spec,
const StatusCallback &callback) override;
Status AsyncCreateActor(const TaskSpecification &task_spec,
const StatusCallback &callback) override;
Status AsyncSubscribeAll(const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncSubscribe(const ActorID &actor_id,
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncUnsubscribe(const ActorID &actor_id) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
bool IsActorUnsubscribed(const ActorID &actor_id) override { return false; }
protected:
virtual std::vector<ActorID> GetAllActorID() const;
virtual Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const;
RedisGcsClient *client_impl_{nullptr};
// Use a random NodeID for actor subscription. Because:
// If we use NodeID::Nil, GCS will still send all actors' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
NodeID subscribe_id_{NodeID::FromRandom()};
private:
typedef SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>
ActorSubscriptionExecutor;
ActorSubscriptionExecutor log_based_actor_sub_executor_;
};
/// \class RedisActorInfoAccessor
/// `RedisActorInfoAccessor` is an implementation of `ActorInfoAccessor`
/// that uses Redis as the backend storage.
class RedisActorInfoAccessor : public RedisLogBasedActorInfoAccessor {
public:
explicit RedisActorInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisActorInfoAccessor() {}
Status AsyncGet(const ActorID &actor_id,
const OptionalItemCallback<ActorTableData> &callback) override;
Status AsyncGetAll(const MultiItemCallback<rpc::ActorTableData> &callback) override;
Status AsyncGetByName(const std::string &name,
const OptionalItemCallback<ActorTableData> &callback) override {
return Status::NotImplemented(
"RedisActorInfoAccessor does not support named detached actors.");
}
Status AsyncSubscribeAll(const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncSubscribe(const ActorID &actor_id,
const SubscribeCallback<ActorID, ActorTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncUnsubscribe(const ActorID &actor_id) override;
protected:
std::vector<ActorID> GetAllActorID() const override;
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data) const override;
private:
typedef SubscriptionExecutor<ActorID, ActorTableData, ActorTable>
ActorSubscriptionExecutor;
ActorSubscriptionExecutor actor_sub_executor_;
};
/// \class RedisJobInfoAccessor
/// RedisJobInfoAccessor is an implementation of `JobInfoAccessor`
/// that uses Redis as the backend storage.
class RedisJobInfoAccessor : public JobInfoAccessor {
public:
explicit RedisJobInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisJobInfoAccessor() {}
Status AsyncAdd(const std::shared_ptr<JobTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncMarkFinished(const JobID &job_id, const StatusCallback &callback) override;
Status AsyncSubscribeAll(const SubscribeCallback<JobID, JobTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncGetAll(const MultiItemCallback<rpc::JobTableData> &callback) override {
return Status::NotImplemented("AsyncGetAll not implemented");
}
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
private:
/// Append job information to GCS asynchronously.
///
/// \param data_ptr The job information that will be appended to GCS.
/// \param callback Callback that will be called after append done.
/// \return Status
Status DoAsyncAppend(const std::shared_ptr<JobTableData> &data_ptr,
const StatusCallback &callback);
RedisGcsClient *client_impl_{nullptr};
typedef SubscriptionExecutor<JobID, JobTableData, JobTable> JobSubscriptionExecutor;
JobSubscriptionExecutor job_sub_executor_;
};
/// \class RedisTaskInfoAccessor
/// `RedisTaskInfoAccessor` is an implementation of `TaskInfoAccessor`
/// that uses Redis as the backend storage.
class RedisTaskInfoAccessor : public TaskInfoAccessor {
public:
explicit RedisTaskInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisTaskInfoAccessor() {}
Status AsyncAdd(const std::shared_ptr<TaskTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncGet(const TaskID &task_id,
const OptionalItemCallback<TaskTableData> &callback) override;
Status AsyncSubscribe(const TaskID &task_id,
const SubscribeCallback<TaskID, TaskTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncUnsubscribe(const TaskID &task_id) override;
Status AsyncAddTaskLease(const std::shared_ptr<TaskLeaseData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncGetTaskLease(const TaskID &task_id,
const OptionalItemCallback<TaskLeaseData> &callback) override;
Status AsyncSubscribeTaskLease(
const TaskID &task_id,
const SubscribeCallback<TaskID, boost::optional<TaskLeaseData>> &subscribe,
const StatusCallback &done) override;
Status AsyncUnsubscribeTaskLease(const TaskID &task_id) override;
Status AttemptTaskReconstruction(
const std::shared_ptr<TaskReconstructionData> &data_ptr,
const StatusCallback &callback) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
bool IsTaskUnsubscribed(const TaskID &task_id) override { return false; }
bool IsTaskLeaseUnsubscribed(const TaskID &task_id) override { return false; }
private:
RedisGcsClient *client_impl_{nullptr};
// Use a random NodeID for task subscription. Because:
// If we use NodeID::Nil, GCS will still send all tasks' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
NodeID subscribe_id_{NodeID::FromRandom()};
typedef SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>
TaskSubscriptionExecutor;
TaskSubscriptionExecutor task_sub_executor_;
typedef SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>, TaskLeaseTable>
TaskLeaseSubscriptionExecutor;
TaskLeaseSubscriptionExecutor task_lease_sub_executor_;
};
/// \class RedisObjectInfoAccessor
/// RedisObjectInfoAccessor is an implementation of `ObjectInfoAccessor`
/// that uses Redis as the backend storage.
class RedisObjectInfoAccessor : public ObjectInfoAccessor {
public:
explicit RedisObjectInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisObjectInfoAccessor() {}
Status AsyncGetLocations(
const ObjectID &object_id,
const OptionalItemCallback<rpc::ObjectLocationInfo> &callback) override;
Status AsyncGetAll(
const MultiItemCallback<rpc::ObjectLocationInfo> &callback) override {
return Status::NotImplemented("AsyncGetAll not implemented");
}
Status AsyncAddLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncAddSpilledUrl(const ObjectID &object_id, const std::string &spilled_url,
const StatusCallback &callback) override {
return Status::NotImplemented("AsyncAddSpilledUrl not implemented");
}
Status AsyncRemoveLocation(const ObjectID &object_id, const NodeID &node_id,
const StatusCallback &callback) override;
Status AsyncSubscribeToLocations(
const ObjectID &object_id,
const SubscribeCallback<ObjectID, std::vector<rpc::ObjectLocationChange>>
&subscribe,
const StatusCallback &done) override;
Status AsyncUnsubscribeToLocations(const ObjectID &object_id) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
bool IsObjectUnsubscribed(const ObjectID &object_id) override { return false; }
private:
RedisGcsClient *client_impl_{nullptr};
// Use a random NodeID for object subscription. Because:
// If we use NodeID::Nil, GCS will still send all objects' updates to this GCS Client.
// Even we can filter out irrelevant updates, but there will be extra overhead.
// And because the new GCS Client will no longer hold the local NodeID, so we use
// random NodeID instead.
// TODO(micafan): Remove this random id, once GCS becomes a service.
NodeID subscribe_id_{NodeID::FromRandom()};
typedef SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>
ObjectSubscriptionExecutor;
ObjectSubscriptionExecutor object_sub_executor_;
};
/// \class RedisNodeInfoAccessor
/// RedisNodeInfoAccessor is an implementation of `NodeInfoAccessor`
/// that uses Redis as the backend storage.
class RedisNodeInfoAccessor : public NodeInfoAccessor {
public:
explicit RedisNodeInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisNodeInfoAccessor() {}
Status RegisterSelf(const GcsNodeInfo &local_node_info,
const StatusCallback &callback) override;
Status UnregisterSelf() override;
const NodeID &GetSelfId() const override;
const GcsNodeInfo &GetSelfInfo() const override;
Status AsyncRegister(const GcsNodeInfo &node_info,
const StatusCallback &callback) override;
Status AsyncUnregister(const NodeID &node_id, const StatusCallback &callback) override;
Status AsyncGetAll(const MultiItemCallback<GcsNodeInfo> &callback) override;
Status AsyncSubscribeToNodeChange(
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const StatusCallback &done) override;
boost::optional<GcsNodeInfo> Get(const NodeID &node_id,
bool filter_dead_nodes = true) const override;
const std::unordered_map<NodeID, GcsNodeInfo> &GetAll() const override;
bool IsRemoved(const NodeID &node_id) const override;
Status AsyncReportHeartbeat(const std::shared_ptr<HeartbeatTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncReportResourceUsage(const std::shared_ptr<rpc::ResourcesData> &data_ptr,
const StatusCallback &callback) override;
void AsyncReReportResourceUsage() override;
Status AsyncGetAllResourceUsage(
const ItemCallback<rpc::ResourceUsageBatchData> &callback) override {
return Status::NotImplemented("AsyncGetAllResourceUsage not implemented");
}
Status AsyncSubscribeBatchedResourceUsage(
const ItemCallback<ResourceUsageBatchData> &subscribe,
const StatusCallback &done) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
Status AsyncSetInternalConfig(
std::unordered_map<std::string, std::string> &config) override {
return Status::NotImplemented("SetInternaConfig not implemented.");
}
Status AsyncGetInternalConfig(
const OptionalItemCallback<std::unordered_map<std::string, std::string>> &callback)
override {
return Status::NotImplemented("GetInternalConfig not implemented.");
}
private:
RedisGcsClient *client_impl_{nullptr};
typedef SubscriptionExecutor<NodeID, ResourceUsageBatchData, ResourceUsageBatchTable>
HeartbeatBatchSubscriptionExecutor;
HeartbeatBatchSubscriptionExecutor resource_usage_batch_sub_executor_;
};
/// \class RedisNodeResourceInfoAccessor
/// RedisNodeResourceInfoAccessor is an implementation of `NodeResourceInfoAccessor`
/// that uses Redis as the backend storage.
class RedisNodeResourceInfoAccessor : public NodeResourceInfoAccessor {
public:
explicit RedisNodeResourceInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisNodeResourceInfoAccessor() {}
Status AsyncGetResources(const NodeID &node_id,
const OptionalItemCallback<ResourceMap> &callback) override;
Status AsyncGetAllAvailableResources(
const MultiItemCallback<rpc::AvailableResources> &callback) override {
return Status::NotImplemented("AsyncGetAllAvailableResources not implemented");
}
Status AsyncUpdateResources(const NodeID &node_id, const ResourceMap &resources,
const StatusCallback &callback) override;
Status AsyncDeleteResources(const NodeID &node_id,
const std::vector<std::string> &resource_names,
const StatusCallback &callback) override;
Status AsyncSubscribeToResources(const ItemCallback<rpc::NodeResourceChange> &subscribe,
const StatusCallback &done) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
private:
RedisGcsClient *client_impl_{nullptr};
typedef SubscriptionExecutor<NodeID, ResourceChangeNotification, DynamicResourceTable>
DynamicResourceSubscriptionExecutor;
DynamicResourceSubscriptionExecutor resource_sub_executor_;
};
/// \class RedisErrorInfoAccessor
/// RedisErrorInfoAccessor is an implementation of `ErrorInfoAccessor`
/// that uses Redis as the backend storage.
class RedisErrorInfoAccessor : public ErrorInfoAccessor {
public:
explicit RedisErrorInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisErrorInfoAccessor() = default;
Status AsyncReportJobError(const std::shared_ptr<ErrorTableData> &data_ptr,
const StatusCallback &callback) override;
};
/// \class RedisStatsInfoAccessor
/// RedisStatsInfoAccessor is an implementation of `StatsInfoAccessor`
/// that uses Redis as the backend storage.
class RedisStatsInfoAccessor : public StatsInfoAccessor {
public:
explicit RedisStatsInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisStatsInfoAccessor() = default;
Status AsyncAddProfileData(const std::shared_ptr<ProfileTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncGetAll(const MultiItemCallback<rpc::ProfileTableData> &callback) override {
return Status::NotImplemented("AsyncGetAll not implemented");
}
private:
RedisGcsClient *client_impl_{nullptr};
};
/// \class RedisWorkerInfoAccessor
/// RedisWorkerInfoAccessor is an implementation of `WorkerInfoAccessor`
/// that uses Redis as the backend storage.
class RedisWorkerInfoAccessor : public WorkerInfoAccessor {
public:
explicit RedisWorkerInfoAccessor(RedisGcsClient *client_impl);
virtual ~RedisWorkerInfoAccessor() = default;
Status AsyncSubscribeToWorkerFailures(
const SubscribeCallback<WorkerID, WorkerTableData> &subscribe,
const StatusCallback &done) override;
Status AsyncReportWorkerFailure(const std::shared_ptr<WorkerTableData> &data_ptr,
const StatusCallback &callback) override;
Status AsyncGet(const WorkerID &worker_id,
const OptionalItemCallback<rpc::WorkerTableData> &callback) override;
Status AsyncGetAll(const MultiItemCallback<rpc::WorkerTableData> &callback) override;
Status AsyncAdd(const std::shared_ptr<rpc::WorkerTableData> &data_ptr,
const StatusCallback &callback) override;
void AsyncResubscribe(bool is_pubsub_server_restarted) override {}
private:
RedisGcsClient *client_impl_{nullptr};
typedef SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>
WorkerFailureSubscriptionExecutor;
WorkerFailureSubscriptionExecutor worker_failure_sub_executor_;
};
class RedisPlacementGroupInfoAccessor : public PlacementGroupInfoAccessor {
public:
virtual ~RedisPlacementGroupInfoAccessor() = default;
Status AsyncCreatePlacementGroup(
const PlacementGroupSpecification &placement_group_spec) override;
Status AsyncRemovePlacementGroup(const PlacementGroupID &placement_group_id,
const StatusCallback &callback) override;
Status AsyncGet(
const PlacementGroupID &placement_group_id,
const OptionalItemCallback<rpc::PlacementGroupTableData> &callback) override;
Status AsyncGetAll(
const MultiItemCallback<rpc::PlacementGroupTableData> &callback) override;
Status AsyncWaitUntilReady(const PlacementGroupID &placement_group_id,
const StatusCallback &callback) override;
};
} // namespace gcs
} // namespace ray
-144
View File
@@ -1,144 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ray/gcs/redis_gcs_client.h"
#include "ray/common/ray_config.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_context.h"
namespace ray {
namespace gcs {
RedisGcsClient::RedisGcsClient(const GcsClientOptions &options)
: RedisGcsClient(options, CommandType::kRegular) {}
RedisGcsClient::RedisGcsClient(const GcsClientOptions &options, CommandType command_type)
: GcsClient(options), command_type_(command_type) {
RedisClientOptions redis_client_options(options.server_ip_, options.server_port_,
options.password_, options.is_test_client_);
redis_client_.reset(new RedisClient(redis_client_options));
}
Status RedisGcsClient::Connect(boost::asio::io_service &io_service) {
RAY_CHECK(!is_connected_);
Status status = redis_client_->Connect(io_service);
if (!status.ok()) {
RAY_LOG(INFO) << "RedisGcsClient::Connect failed, status " << status.ToString();
return status;
}
std::shared_ptr<RedisContext> primary_context = redis_client_->GetPrimaryContext();
std::vector<std::shared_ptr<RedisContext>> shard_contexts =
redis_client_->GetShardContexts();
log_based_actor_table_.reset(new LogBasedActorTable({primary_context}, this));
actor_table_.reset(new ActorTable({primary_context}, this));
// TODO(micafan) Modify NodeTable' Constructor(remove NodeID) in future.
// We will use NodeID instead of NodeID.
// For worker/driver, it might not have this field(NodeID).
// For raylet, NodeID should be initialized in raylet layer(not here).
node_table_.reset(new NodeTable({primary_context}, this));
job_table_.reset(new JobTable({primary_context}, this));
resource_usage_batch_table_.reset(new ResourceUsageBatchTable({primary_context}, this));
// Tables below would be sharded.
object_table_.reset(new ObjectTable(shard_contexts, this));
raylet_task_table_.reset(new raylet::TaskTable(shard_contexts, this, command_type_));
task_reconstruction_log_.reset(new TaskReconstructionLog(shard_contexts, this));
task_lease_table_.reset(new TaskLeaseTable(shard_contexts, this));
heartbeat_table_.reset(new HeartbeatTable(shard_contexts, this));
profile_table_.reset(new ProfileTable(shard_contexts, this));
resource_table_.reset(new DynamicResourceTable({primary_context}, this));
worker_table_.reset(new WorkerTable(shard_contexts, this));
actor_accessor_.reset(new RedisActorInfoAccessor(this));
job_accessor_.reset(new RedisJobInfoAccessor(this));
object_accessor_.reset(new RedisObjectInfoAccessor(this));
node_accessor_.reset(new RedisNodeInfoAccessor(this));
node_resource_accessor_.reset(new RedisNodeResourceInfoAccessor(this));
task_accessor_.reset(new RedisTaskInfoAccessor(this));
error_accessor_.reset(new RedisErrorInfoAccessor(this));
stats_accessor_.reset(new RedisStatsInfoAccessor(this));
worker_accessor_.reset(new RedisWorkerInfoAccessor(this));
placement_group_accessor_.reset(new RedisPlacementGroupInfoAccessor());
is_connected_ = true;
RAY_LOG(DEBUG) << "RedisGcsClient connected.";
return Status::OK();
}
void RedisGcsClient::Disconnect() {
RAY_CHECK(is_connected_);
is_connected_ = false;
redis_client_->Disconnect();
RAY_LOG(DEBUG) << "RedisGcsClient Disconnected.";
}
std::string RedisGcsClient::DebugString() const {
std::stringstream result;
result << "RedisGcsClient:";
result << "\n- TaskTable: " << raylet_task_table_->DebugString();
result << "\n- LogBasedActorTable: " << log_based_actor_table_->DebugString();
result << "\n- ActorTable: " << actor_table_->DebugString();
result << "\n- TaskReconstructionLog: " << task_reconstruction_log_->DebugString();
result << "\n- TaskLeaseTable: " << task_lease_table_->DebugString();
result << "\n- HeartbeatTable: " << heartbeat_table_->DebugString();
result << "\n- ProfileTable: " << profile_table_->DebugString();
result << "\n- NodeTable: " << node_table_->DebugString();
result << "\n- JobTable: " << job_table_->DebugString();
return result.str();
}
ObjectTable &RedisGcsClient::object_table() { return *object_table_; }
raylet::TaskTable &RedisGcsClient::raylet_task_table() { return *raylet_task_table_; }
LogBasedActorTable &RedisGcsClient::log_based_actor_table() {
return *log_based_actor_table_;
}
ActorTable &RedisGcsClient::actor_table() { return *actor_table_; }
WorkerTable &RedisGcsClient::worker_table() { return *worker_table_; }
TaskReconstructionLog &RedisGcsClient::task_reconstruction_log() {
return *task_reconstruction_log_;
}
TaskLeaseTable &RedisGcsClient::task_lease_table() { return *task_lease_table_; }
NodeTable &RedisGcsClient::node_table() { return *node_table_; }
HeartbeatTable &RedisGcsClient::heartbeat_table() { return *heartbeat_table_; }
ResourceUsageBatchTable &RedisGcsClient::resource_usage_batch_table() {
return *resource_usage_batch_table_;
}
JobTable &RedisGcsClient::job_table() { return *job_table_; }
ProfileTable &RedisGcsClient::profile_table() { return *profile_table_; }
DynamicResourceTable &RedisGcsClient::resource_table() { return *resource_table_; }
} // namespace gcs
} // namespace ray
-131
View File
@@ -1,131 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include "ray/common/id.h"
#include "ray/common/status.h"
#include "ray/gcs/asio.h"
#include "ray/gcs/gcs_client.h"
#include "ray/gcs/redis_client.h"
#include "ray/gcs/tables.h"
#include "ray/util/logging.h"
namespace ray {
namespace gcs {
class RedisContext;
class RAY_EXPORT RedisGcsClient : public GcsClient {
public:
/// Constructor of RedisGcsClient.
/// Connect() must be called(and return ok) before you call any other methods.
/// TODO(micafan) To read and write from the GCS tables requires a further
/// call to Connect() to the client table. Will fix this in next pr.
///
/// \param options Options of this client, e.g. server address, password and so on.
RedisGcsClient(const GcsClientOptions &options);
/// This constructor is only used for testing.
/// Connect() must be called(and return ok) before you call any other methods.
///
/// \param options Options of this client, e.g. server address, password and so on.
/// \param command_type The commands issued type.
RedisGcsClient(const GcsClientOptions &options, CommandType command_type);
/// Connect to GCS Service. Non-thread safe.
/// Call this function before calling other functions.
///
/// \param io_service The event loop for this client.
/// Must be single-threaded io_service (get more information from RedisAsioClient).
///
/// \return Status
Status Connect(boost::asio::io_service &io_service) override;
/// Disconnect with GCS Service. Non-thread safe.
void Disconnect() override;
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const override;
// We also need something to export generic code to run on workers from the
// driver (to set the PYTHONPATH)
using GetExportCallback = std::function<void(const std::string &data)>;
Status AddExport(const std::string &job_id, std::string &export_data);
Status GetExport(const std::string &job_id, int64_t export_index,
const GetExportCallback &done_callback);
std::vector<std::shared_ptr<RedisContext>> shard_contexts() {
return redis_client_->GetShardContexts();
}
std::shared_ptr<RedisContext> primary_context() {
return redis_client_->GetPrimaryContext();
}
std::shared_ptr<RedisClient> GetRedisClient() const { return redis_client_; }
/// The following xxx_table methods implement the Accessor interfaces.
/// Implements the Actors() interface.
LogBasedActorTable &log_based_actor_table();
ActorTable &actor_table();
/// Implements the Jobs() interface.
JobTable &job_table();
/// Implements the Objects() interface.
ObjectTable &object_table();
/// Implements the Nodes() interface.
NodeTable &node_table();
HeartbeatTable &heartbeat_table();
ResourceUsageBatchTable &resource_usage_batch_table();
DynamicResourceTable &resource_table();
/// Implements the Tasks() interface.
virtual raylet::TaskTable &raylet_task_table();
TaskLeaseTable &task_lease_table();
TaskReconstructionLog &task_reconstruction_log();
/// Implements the Stats() interface.
ProfileTable &profile_table();
/// Implements the Workers() interface.
WorkerTable &worker_table();
private:
// GCS command type. If CommandType::kChain, chain-replicated versions of the tables
// might be used, if available.
CommandType command_type_{CommandType::kUnknown};
std::shared_ptr<RedisClient> redis_client_;
std::unique_ptr<ObjectTable> object_table_;
std::unique_ptr<raylet::TaskTable> raylet_task_table_;
std::unique_ptr<LogBasedActorTable> log_based_actor_table_;
std::unique_ptr<ActorTable> actor_table_;
std::unique_ptr<TaskReconstructionLog> task_reconstruction_log_;
std::unique_ptr<TaskLeaseTable> task_lease_table_;
std::unique_ptr<HeartbeatTable> heartbeat_table_;
std::unique_ptr<ResourceUsageBatchTable> resource_usage_batch_table_;
std::unique_ptr<ProfileTable> profile_table_;
std::unique_ptr<NodeTable> node_table_;
std::unique_ptr<DynamicResourceTable> resource_table_;
std::unique_ptr<WorkerTable> worker_table_;
std::unique_ptr<JobTable> job_table_;
};
} // namespace gcs
} // namespace ray
-215
View File
@@ -1,215 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ray/gcs/subscription_executor.h"
namespace ray {
namespace gcs {
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribeAll(
const NodeID &node_id, const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done) {
// TODO(micafan) Optimize the lock when necessary.
// Consider avoiding locking in single-threaded processes.
std::unique_lock<std::mutex> lock(mutex_);
if (subscribe_all_callback_ != nullptr) {
RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to all elements.";
return Status::Invalid("Duplicate subscription!");
}
if (registration_status_ != RegistrationStatus::kNotRegistered) {
if (subscribe != nullptr) {
RAY_LOG(DEBUG) << "Duplicate subscription! Already subscribed to specific elements"
", can't subscribe to all elements.";
return Status::Invalid("Duplicate subscription!");
}
}
if (registration_status_ == RegistrationStatus::kRegistered) {
// Already registered to GCS, just invoke the `done` callback.
lock.unlock();
if (done != nullptr) {
done(Status::OK());
}
return Status::OK();
}
// Registration to GCS is not finished yet, add the `done` callback to the pending list
// to be invoked when registration is done.
if (done != nullptr) {
pending_subscriptions_.emplace_back(done);
}
// If there's another registration request that's already on-going, then wait for it
// to finish.
if (registration_status_ == RegistrationStatus::kRegistering) {
return Status::OK();
}
auto on_subscribe = [this](RedisGcsClient *client, const ID &id,
const std::vector<Data> &result) {
if (result.empty()) {
return;
}
SubscribeCallback<ID, Data> sub_one_callback = nullptr;
SubscribeCallback<ID, Data> sub_all_callback = nullptr;
{
std::unique_lock<std::mutex> lock(mutex_);
const auto it = id_to_callback_map_.find(id);
if (it != id_to_callback_map_.end()) {
sub_one_callback = it->second;
}
sub_all_callback = subscribe_all_callback_;
}
if (sub_one_callback != nullptr) {
sub_one_callback(id, result.back());
}
if (sub_all_callback != nullptr) {
RAY_CHECK(sub_one_callback == nullptr);
sub_all_callback(id, result.back());
}
};
auto on_done = [this](RedisGcsClient *client) {
std::list<StatusCallback> pending_callbacks;
{
std::unique_lock<std::mutex> lock(mutex_);
registration_status_ = RegistrationStatus::kRegistered;
pending_callbacks.swap(pending_subscriptions_);
RAY_CHECK(pending_subscriptions_.empty());
}
for (const auto &callback : pending_callbacks) {
callback(Status::OK());
}
};
Status status = table_.Subscribe(JobID::Nil(), node_id, on_subscribe, on_done);
if (status.ok()) {
registration_status_ = RegistrationStatus::kRegistering;
subscribe_all_callback_ = subscribe;
}
return status;
}
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncSubscribe(
const NodeID &node_id, const ID &id, const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done) {
RAY_CHECK(node_id != NodeID::Nil());
// NOTE(zhijunfu): `Subscribe` and other operations use different redis contexts,
// thus we need to call `RequestNotifications` in the Subscribe callback to ensure
// it's processed after the `Subscribe` request. Otherwise if `RequestNotifications`
// is processed first we will miss the initial notification.
auto on_subscribe_done = [this, node_id, id, subscribe, done](Status status) {
auto on_request_notification_done = [this, done, id](Status status) {
if (!status.ok()) {
std::unique_lock<std::mutex> lock(mutex_);
id_to_callback_map_.erase(id);
}
if (done != nullptr) {
done(status);
}
};
{
std::unique_lock<std::mutex> lock(mutex_);
status = table_.RequestNotifications(JobID::Nil(), id, node_id,
on_request_notification_done);
if (!status.ok()) {
id_to_callback_map_.erase(id);
}
}
};
{
std::unique_lock<std::mutex> lock(mutex_);
const auto it = id_to_callback_map_.find(id);
if (it != id_to_callback_map_.end()) {
RAY_LOG(DEBUG) << "Duplicate subscription to id " << id << " node_id " << node_id;
return Status::Invalid("Duplicate subscription to element!");
}
id_to_callback_map_[id] = subscribe;
}
auto status = AsyncSubscribeAll(node_id, nullptr, on_subscribe_done);
if (!status.ok()) {
std::unique_lock<std::mutex> lock(mutex_);
id_to_callback_map_.erase(id);
}
return status;
}
template <typename ID, typename Data, typename Table>
Status SubscriptionExecutor<ID, Data, Table>::AsyncUnsubscribe(
const NodeID &node_id, const ID &id, const StatusCallback &done) {
SubscribeCallback<ID, Data> subscribe = nullptr;
{
std::unique_lock<std::mutex> lock(mutex_);
const auto it = id_to_callback_map_.find(id);
if (it == id_to_callback_map_.end()) {
RAY_LOG(DEBUG) << "Invalid Unsubscribe! id " << id << " node_id " << node_id;
return Status::Invalid("Invalid Unsubscribe, no existing subscription found.");
}
subscribe = std::move(it->second);
id_to_callback_map_.erase(it);
}
RAY_CHECK(subscribe != nullptr);
auto on_done = [this, id, subscribe, done](Status status) {
if (!status.ok()) {
std::unique_lock<std::mutex> lock(mutex_);
const auto it = id_to_callback_map_.find(id);
if (it != id_to_callback_map_.end()) {
// The initial AsyncUnsubscribe deleted the callback, but the client
// has subscribed again in the meantime. This new callback will be
// called if we receive more notifications.
RAY_LOG(WARNING)
<< "Client called AsyncSubscribe on " << id
<< " while AsyncUnsubscribe was pending, but the unsubscribe failed.";
} else {
// The Unsubscribe failed, so restore the initial callback.
id_to_callback_map_[id] = subscribe;
}
}
if (done != nullptr) {
done(status);
}
};
return table_.CancelNotifications(JobID::Nil(), id, node_id, on_done);
}
template class SubscriptionExecutor<ActorID, ActorTableData, LogBasedActorTable>;
template class SubscriptionExecutor<ActorID, ActorTableData, ActorTable>;
template class SubscriptionExecutor<JobID, JobTableData, JobTable>;
template class SubscriptionExecutor<TaskID, TaskTableData, raylet::TaskTable>;
template class SubscriptionExecutor<ObjectID, ObjectChangeNotification, ObjectTable>;
template class SubscriptionExecutor<TaskID, boost::optional<TaskLeaseData>,
TaskLeaseTable>;
template class SubscriptionExecutor<NodeID, ResourceChangeNotification,
DynamicResourceTable>;
template class SubscriptionExecutor<NodeID, ResourceUsageBatchData,
ResourceUsageBatchTable>;
template class SubscriptionExecutor<WorkerID, WorkerTableData, WorkerTable>;
} // namespace gcs
} // namespace ray
-108
View File
@@ -1,108 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <atomic>
#include <list>
#include <mutex>
#include "ray/gcs/callback.h"
#include "ray/gcs/tables.h"
namespace ray {
namespace gcs {
/// \class SubscriptionExecutor
/// SubscriptionExecutor class encapsulates the implementation details of
/// subscribe/unsubscribe to elements (e.g.: actors or tasks or objects or nodes).
/// Support subscribing to a specific element or subscribing to all elements.
template <typename ID, typename Data, typename Table>
class SubscriptionExecutor {
public:
explicit SubscriptionExecutor(Table &table) : table_(table) {}
~SubscriptionExecutor() {}
/// Subscribe to operations of all elements.
/// Repeated subscription will return a failure.
///
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each update will be received. Else, only
/// messages for the given node will be received.
/// \param subscribe Callback that will be called each time when an element
/// is registered or updated.
/// \param done Callback that will be called when subscription is complete.
/// \return Status
Status AsyncSubscribeAll(const NodeID &node_id,
const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done);
/// Subscribe to operations of an element.
/// Repeated subscription to an element will return a failure.
///
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each update will be received. Else, only
/// messages for the given node will be received.
/// \param id The id of the element to be subscribe to.
/// \param subscribe Callback that will be called each time when the element
/// is registered or updated.
/// \param done Callback that will be called when subscription is complete.
/// \return Status
Status AsyncSubscribe(const NodeID &node_id, const ID &id,
const SubscribeCallback<ID, Data> &subscribe,
const StatusCallback &done);
/// Cancel subscription to an element.
/// Unsubscribing can only be called after the subscription request is completed.
///
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each update will be received. Else, only
/// messages for the given node will be received.
/// \param id The id of the element to be unsubscribed to.
/// \param done Callback that will be called when cancel subscription is complete.
/// \return Status
Status AsyncUnsubscribe(const NodeID &node_id, const ID &id,
const StatusCallback &done);
private:
Table &table_;
std::mutex mutex_;
enum class RegistrationStatus : uint8_t {
kNotRegistered,
kRegistering,
kRegistered,
};
/// Whether successfully registered subscription to GCS.
RegistrationStatus registration_status_{RegistrationStatus::kNotRegistered};
/// List of subscriptions before registration to GCS is done, these callbacks
/// will be called when the registration to GCS finishes.
std::list<StatusCallback> pending_subscriptions_;
/// Subscribe Callback of all elements.
SubscribeCallback<ID, Data> subscribe_all_callback_{nullptr};
/// A mapping from element ID to subscription callback.
typedef std::unordered_map<ID, SubscribeCallback<ID, Data>> IDToCallbackMap;
IDToCallbackMap id_to_callback_map_;
};
} // namespace gcs
} // namespace ray
-847
View File
@@ -1,847 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "ray/gcs/tables.h"
#include "absl/time/clock.h"
#include "ray/common/common_protocol.h"
#include "ray/common/grpc_util.h"
#include "ray/common/ray_config.h"
#include "ray/gcs/redis_gcs_client.h"
extern "C" {
#include "hiredis/hiredis.h"
}
namespace {
static const std::string kTableAppendCommand = "RAY.TABLE_APPEND";
static const std::string kChainTableAppendCommand = "RAY.CHAIN.TABLE_APPEND";
static const std::string kTableAddCommand = "RAY.TABLE_ADD";
static const std::string kChainTableAddCommand = "RAY.CHAIN.TABLE_ADD";
std::string GetLogAppendCommand(const ray::gcs::CommandType command_type) {
if (command_type == ray::gcs::CommandType::kRegular) {
return kTableAppendCommand;
} else {
RAY_CHECK(command_type == ray::gcs::CommandType::kChain);
return kChainTableAppendCommand;
}
}
std::string GetTableAddCommand(const ray::gcs::CommandType command_type) {
if (command_type == ray::gcs::CommandType::kRegular) {
return kTableAddCommand;
} else {
RAY_CHECK(command_type == ray::gcs::CommandType::kChain);
return kChainTableAddCommand;
}
}
} // namespace
namespace ray {
namespace gcs {
template <typename ID, typename Data>
Status Log<ID, Data>::Append(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data,
const WriteCallback &done) {
num_appends_++;
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
const auto status = reply->ReadAsStatus();
// Failed to append the entry.
RAY_CHECK(status.ok()) << "Failed to execute command TABLE_APPEND:"
<< status.ToString();
if (done != nullptr) {
(done)(client_, id, *data);
}
};
std::string str = data->SerializeAsString();
return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(),
str.length(), prefix_, pubsub_channel_,
std::move(callback));
}
template <typename ID, typename Data>
Status Log<ID, Data>::SyncAppend(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data) {
num_appends_++;
std::string str = data->SerializeAsString();
auto reply =
GetRedisContext(id)->RunSync(GetLogAppendCommand(command_type_), id, str.data(),
str.length(), prefix_, pubsub_channel_);
Status status = reply ? reply->ReadAsStatus() : Status::RedisError("Redis error");
return status;
}
template <typename ID, typename Data>
Status Log<ID, Data>::AppendAt(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data,
const WriteCallback &done, const WriteCallback &failure,
int log_length) {
num_appends_++;
auto callback = [this, id, data, done, failure](std::shared_ptr<CallbackReply> reply) {
const auto status = reply->ReadAsStatus();
if (status.ok()) {
if (done != nullptr) {
(done)(client_, id, *data);
}
} else {
if (failure != nullptr) {
(failure)(client_, id, *data);
}
}
};
std::string str = data->SerializeAsString();
return GetRedisContext(id)->RunAsync(GetLogAppendCommand(command_type_), id, str.data(),
str.length(), prefix_, pubsub_channel_,
std::move(callback), log_length);
}
template <typename ID, typename Data>
Status Log<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &lookup) {
num_lookups_++;
auto callback = [this, id, lookup](std::shared_ptr<CallbackReply> reply) {
if (lookup != nullptr) {
std::vector<Data> results;
if (!reply->IsNil()) {
GcsEntry gcs_entry;
gcs_entry.ParseFromString(reply->ReadAsString());
RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id);
for (int64_t i = 0; i < gcs_entry.entries_size(); i++) {
Data data;
data.ParseFromString(gcs_entry.entries(i));
results.emplace_back(std::move(data));
}
}
lookup(client_, id, results);
}
};
std::vector<uint8_t> nil;
return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
auto subscribe_wrapper = [subscribe](RedisGcsClient *client, const ID &id,
const GcsChangeMode change_mode,
const std::vector<Data> &data) {
RAY_CHECK(change_mode != GcsChangeMode::REMOVE);
subscribe(client, id, data);
};
return Subscribe(job_id, node_id, subscribe_wrapper, done);
}
template <typename ID, typename Data>
Status Log<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done) {
RAY_CHECK(subscribe_callback_index_ == -1)
<< "Client called Subscribe twice on the same table";
auto callback = [this, subscribe, done](std::shared_ptr<CallbackReply> reply) {
const auto data = reply->ReadAsPubsubData();
if (data.empty()) {
// No notification data is provided. This is the callback for the
// initial subscription request.
if (done != nullptr) {
done(client_);
}
} else {
// Data is provided. This is the callback for a message.
if (subscribe != nullptr) {
// Parse the notification.
GcsEntry gcs_entry;
gcs_entry.ParseFromString(data);
ID id = ID::FromBinary(gcs_entry.id());
std::vector<Data> results;
for (int64_t i = 0; i < gcs_entry.entries_size(); i++) {
Data result;
result.ParseFromString(gcs_entry.entries(i));
results.emplace_back(std::move(result));
}
subscribe(client_, id, gcs_entry.change_mode(), results);
}
}
};
subscribe_callback_index_ = 1;
for (auto &context : shard_contexts_) {
RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback,
&subscribe_callback_index_));
}
return Status::OK();
}
template <typename ID, typename Data>
Status Log<ID, Data>::RequestNotifications(const JobID &job_id, const ID &id,
const NodeID &node_id,
const StatusCallback &done) {
RAY_CHECK(subscribe_callback_index_ >= 0)
<< "Client requested notifications on a key before Subscribe completed";
RedisCallback callback = nullptr;
if (done != nullptr) {
callback = [done](std::shared_ptr<CallbackReply> reply) {
const auto status = reply->IsNil()
? Status::OK()
: Status::RedisError("request notifications failed.");
done(status);
};
}
return GetRedisContext(id)->RunAsync("RAY.TABLE_REQUEST_NOTIFICATIONS", id,
node_id.Data(), node_id.Size(), prefix_,
pubsub_channel_, callback);
}
template <typename ID, typename Data>
Status Log<ID, Data>::CancelNotifications(const JobID &job_id, const ID &id,
const NodeID &node_id,
const StatusCallback &done) {
RAY_CHECK(subscribe_callback_index_ >= 0)
<< "Client canceled notifications on a key before Subscribe completed";
RedisCallback callback = nullptr;
if (done != nullptr) {
callback = [done](std::shared_ptr<CallbackReply> reply) {
const auto status = reply->ReadAsStatus();
done(status);
};
}
return GetRedisContext(id)->RunAsync("RAY.TABLE_CANCEL_NOTIFICATIONS", id,
node_id.Data(), node_id.Size(), prefix_,
pubsub_channel_, callback);
}
template <typename ID, typename Data>
void Log<ID, Data>::Delete(const JobID &job_id, const std::vector<ID> &ids) {
if (ids.empty()) {
return;
}
std::unordered_map<RedisContext *, std::ostringstream> sharded_data;
for (const auto &id : ids) {
sharded_data[GetRedisContext(id).get()] << id.Binary();
}
// Breaking really large deletion commands into batches of smaller size.
const size_t batch_size =
RayConfig::instance().maximum_gcs_deletion_batch_size() * ID::Size();
for (const auto &pair : sharded_data) {
std::string current_data = pair.second.str();
for (size_t cur = 0; cur < pair.second.str().size(); cur += batch_size) {
size_t data_field_size = std::min(batch_size, current_data.size() - cur);
uint16_t id_count = data_field_size / ID::Size();
// Send data contains id count and all the id data.
std::string send_data(data_field_size + sizeof(id_count), 0);
uint8_t *buffer = reinterpret_cast<uint8_t *>(&send_data[0]);
*reinterpret_cast<uint16_t *>(buffer) = id_count;
RAY_IGNORE_EXPR(
std::copy_n(reinterpret_cast<const uint8_t *>(current_data.c_str() + cur),
data_field_size, buffer + sizeof(uint16_t)));
RAY_IGNORE_EXPR(
pair.first->RunAsync("RAY.TABLE_DELETE", UniqueID::Nil(),
reinterpret_cast<const uint8_t *>(send_data.c_str()),
send_data.size(), prefix_, pubsub_channel_,
/*redisCallback=*/nullptr));
}
}
}
template <typename ID, typename Data>
void Log<ID, Data>::Delete(const JobID &job_id, const ID &id) {
Delete(job_id, std::vector<ID>({id}));
}
template <typename ID, typename Data>
std::string Log<ID, Data>::DebugString() const {
std::stringstream result;
result << "num lookups: " << num_lookups_ << ", num appends: " << num_appends_;
return result.str();
}
template <typename ID, typename Data>
Status Table<ID, Data>::Add(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data,
const WriteCallback &done) {
num_adds_++;
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
if (done != nullptr) {
(done)(client_, id, *data);
}
};
std::string str = data->SerializeAsString();
return GetRedisContext(id)->RunAsync(GetTableAddCommand(command_type_), id, str.data(),
str.length(), prefix_, pubsub_channel_,
std::move(callback));
}
template <typename ID, typename Data>
Status Table<ID, Data>::Lookup(const JobID &job_id, const ID &id, const Callback &lookup,
const FailureCallback &failure) {
num_lookups_++;
return Log<ID, Data>::Lookup(job_id, id,
[lookup, failure](RedisGcsClient *client, const ID &id,
const std::vector<Data> &data) {
if (data.empty()) {
if (failure != nullptr) {
(failure)(client, id);
}
} else {
RAY_CHECK(data.size() == 1);
if (lookup != nullptr) {
(lookup)(client, id, data[0]);
}
}
});
}
template <typename ID, typename Data>
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const Callback &subscribe,
const FailureCallback &failure,
const SubscriptionCallback &done) {
return Log<ID, Data>::Subscribe(
job_id, node_id,
[subscribe, failure](RedisGcsClient *client, const ID &id,
const std::vector<Data> &data) {
RAY_CHECK(data.empty() || data.size() == 1);
if (data.size() == 1) {
subscribe(client, id, data[0]);
} else {
if (failure != nullptr) {
failure(client, id);
}
}
},
done);
}
template <typename ID, typename Data>
Status Table<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
return Subscribe(job_id, node_id, subscribe, /*failure*/ nullptr, done);
}
template <typename ID, typename Data>
std::string Table<ID, Data>::DebugString() const {
std::stringstream result;
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_;
return result.str();
}
template <typename ID, typename Data>
Status Set<ID, Data>::Add(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data, const WriteCallback &done) {
num_adds_++;
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
if (done != nullptr) {
(done)(client_, id, *data);
}
};
std::string str = data->SerializeAsString();
return GetRedisContext(id)->RunAsync("RAY.SET_ADD", id, str.data(), str.length(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
Status Set<ID, Data>::Remove(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data,
const WriteCallback &done) {
num_removes_++;
auto callback = [this, id, data, done](std::shared_ptr<CallbackReply> reply) {
if (done != nullptr) {
(done)(client_, id, *data);
}
};
std::string str = data->SerializeAsString();
return GetRedisContext(id)->RunAsync("RAY.SET_REMOVE", id, str.data(), str.length(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
Status Set<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done) {
auto on_subscribe = [subscribe](RedisGcsClient *client, const ID &id,
const GcsChangeMode change_mode,
const std::vector<Data> &data) {
ArrayNotification<Data> change_notification(change_mode, data);
std::vector<ArrayNotification<Data>> notification_vec;
notification_vec.emplace_back(std::move(change_notification));
subscribe(client, id, notification_vec);
};
return Log<ID, Data>::Subscribe(job_id, node_id, on_subscribe, done);
}
template <typename ID, typename Data>
std::string Set<ID, Data>::DebugString() const {
std::stringstream result;
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_
<< ", num removes: " << num_removes_;
return result.str();
}
template <typename ID, typename Data>
Status Hash<ID, Data>::Update(const JobID &job_id, const ID &id, const DataMap &data_map,
const HashCallback &done) {
num_adds_++;
auto callback = [this, id, data_map, done](std::shared_ptr<CallbackReply> reply) {
if (done != nullptr) {
(done)(client_, id, data_map);
}
};
GcsEntry gcs_entry;
gcs_entry.set_id(id.Binary());
gcs_entry.set_change_mode(GcsChangeMode::APPEND_OR_ADD);
for (const auto &pair : data_map) {
gcs_entry.add_entries(pair.first);
gcs_entry.add_entries(pair.second->SerializeAsString());
}
std::string str = gcs_entry.SerializeAsString();
return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
Status Hash<ID, Data>::RemoveEntries(const JobID &job_id, const ID &id,
const std::vector<std::string> &keys,
const HashRemoveCallback &remove_callback) {
num_removes_++;
auto callback = [this, id, keys,
remove_callback](std::shared_ptr<CallbackReply> reply) {
if (remove_callback != nullptr) {
(remove_callback)(client_, id, keys);
}
};
GcsEntry gcs_entry;
gcs_entry.set_id(id.Binary());
gcs_entry.set_change_mode(GcsChangeMode::REMOVE);
for (const auto &key : keys) {
gcs_entry.add_entries(key);
}
std::string str = gcs_entry.SerializeAsString();
return GetRedisContext(id)->RunAsync("RAY.HASH_UPDATE", id, str.data(), str.size(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
std::string Hash<ID, Data>::DebugString() const {
std::stringstream result;
result << "num lookups: " << num_lookups_ << ", num adds: " << num_adds_
<< ", num removes: " << num_removes_;
return result.str();
}
template <typename ID, typename Data>
Status Hash<ID, Data>::Lookup(const JobID &job_id, const ID &id,
const HashCallback &lookup) {
num_lookups_++;
auto callback = [this, id, lookup](std::shared_ptr<CallbackReply> reply) {
if (lookup != nullptr) {
DataMap results;
if (!reply->IsNil()) {
const auto data = reply->ReadAsString();
GcsEntry gcs_entry;
gcs_entry.ParseFromString(reply->ReadAsString());
RAY_CHECK(ID::FromBinary(gcs_entry.id()) == id);
RAY_CHECK(gcs_entry.entries_size() % 2 == 0);
for (int i = 0; i < gcs_entry.entries_size(); i += 2) {
const auto &key = gcs_entry.entries(i);
const auto value = std::make_shared<Data>();
value->ParseFromString(gcs_entry.entries(i + 1));
results.emplace(key, std::move(value));
}
}
lookup(client_, id, results);
}
};
std::vector<uint8_t> nil;
return GetRedisContext(id)->RunAsync("RAY.TABLE_LOOKUP", id, nil.data(), nil.size(),
prefix_, pubsub_channel_, std::move(callback));
}
template <typename ID, typename Data>
Status Hash<ID, Data>::Subscribe(const JobID &job_id, const NodeID &node_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) {
RAY_CHECK(subscribe_callback_index_ == -1)
<< "Client called Subscribe twice on the same table";
auto callback = [this, subscribe, done](std::shared_ptr<CallbackReply> reply) {
const auto data = reply->ReadAsPubsubData();
if (data.empty()) {
// No notification data is provided. This is the callback for the
// initial subscription request.
if (done != nullptr) {
done(client_);
}
} else {
// Data is provided. This is the callback for a message.
if (subscribe != nullptr) {
// Parse the notification.
GcsEntry gcs_entry;
gcs_entry.ParseFromString(data);
ID id = ID::FromBinary(gcs_entry.id());
DataMap data_map;
if (gcs_entry.change_mode() == GcsChangeMode::REMOVE) {
for (const auto &key : gcs_entry.entries()) {
data_map.emplace(key, std::shared_ptr<Data>());
}
} else {
RAY_CHECK(gcs_entry.entries_size() % 2 == 0);
for (int i = 0; i < gcs_entry.entries_size(); i += 2) {
const auto &key = gcs_entry.entries(i);
const auto value = std::make_shared<Data>();
value->ParseFromString(gcs_entry.entries(i + 1));
data_map.emplace(key, std::move(value));
}
}
MapNotification<std::string, Data> notification(gcs_entry.change_mode(),
data_map);
std::vector<MapNotification<std::string, Data>> notification_vec;
notification_vec.emplace_back(std::move(notification));
subscribe(client_, id, notification_vec);
}
}
};
subscribe_callback_index_ = 1;
for (auto &context : shard_contexts_) {
RAY_RETURN_NOT_OK(context->SubscribeAsync(node_id, pubsub_channel_, callback,
&subscribe_callback_index_));
}
return Status::OK();
}
std::string ProfileTable::DebugString() const {
return Log<UniqueID, ProfileTableData>::DebugString();
}
void NodeTable::RegisterNodeChangeCallback(const NodeChangeCallback &callback) {
RAY_CHECK(node_change_callback_ == nullptr);
node_change_callback_ = callback;
// Call the callback for any added clients that are cached.
for (const auto &entry : node_cache_) {
if (!entry.first.IsNil()) {
RAY_CHECK(entry.second.state() == GcsNodeInfo::ALIVE ||
entry.second.state() == GcsNodeInfo::DEAD);
node_change_callback_(entry.first, entry.second);
}
}
}
void NodeTable::HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info) {
NodeID node_id = NodeID::FromBinary(node_info.node_id());
bool is_alive = (node_info.state() == GcsNodeInfo::ALIVE);
// It's possible to get duplicate notifications from the client table, so
// check whether this notification is new.
auto entry = node_cache_.find(node_id);
bool is_notif_new;
if (entry == node_cache_.end()) {
// If the entry is not in the cache, then the notification is new.
is_notif_new = true;
} else {
// If the entry is in the cache, then the notification is new if the client
// was alive and is now dead or resources have been updated.
bool was_alive = (entry->second.state() == GcsNodeInfo::ALIVE);
is_notif_new = was_alive && !is_alive;
// Once a node with a given ID has been removed, it should never be added
// again. If the entry was in the cache and the node was deleted, check
// that this new notification is not an insertion.
if (!was_alive) {
RAY_CHECK(!is_alive)
<< "Notification for addition of a node that was already removed:" << node_id;
}
}
// Add the notification to our cache. Notifications are idempotent.
RAY_LOG(DEBUG) << "[NodeTableNotification] NodeTable Insertion/Deletion "
"notification for node id "
<< node_id << ". IsAlive: " << is_alive
<< ". Setting the node cache to data.";
node_cache_[node_id] = node_info;
// If the notification is new, call any registered callbacks.
GcsNodeInfo &cache_data = node_cache_[node_id];
if (is_notif_new) {
if (is_alive) {
RAY_CHECK(removed_nodes_.find(node_id) == removed_nodes_.end());
} else {
// NOTE(swang): The node should be added to this data structure before
// the callback gets called, in case the callback depends on the data
// structure getting updated.
removed_nodes_.insert(node_id);
}
if (node_change_callback_ != nullptr) {
node_change_callback_(node_id, cache_data);
}
}
}
const NodeID &NodeTable::GetLocalNodeId() const {
RAY_CHECK(!local_node_id_.IsNil());
return local_node_id_;
}
const GcsNodeInfo &NodeTable::GetLocalNode() const { return local_node_info_; }
bool NodeTable::IsRemoved(const NodeID &node_id) const {
return removed_nodes_.count(node_id) == 1;
}
Status NodeTable::Connect(const GcsNodeInfo &local_node_info) {
RAY_CHECK(!disconnected_) << "Tried to reconnect a disconnected node.";
RAY_CHECK(local_node_id_.IsNil()) << "This node is already connected.";
RAY_CHECK(local_node_info.state() == GcsNodeInfo::ALIVE);
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info);
Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr);
if (status.ok()) {
local_node_id_ = NodeID::FromBinary(local_node_info.node_id());
local_node_info_ = local_node_info;
}
return status;
}
Status NodeTable::Disconnect() {
local_node_info_.set_state(GcsNodeInfo::DEAD);
auto node_info_ptr = std::make_shared<GcsNodeInfo>(local_node_info_);
Status status = SyncAppend(JobID::Nil(), node_log_key_, node_info_ptr);
if (status.ok()) {
// We successfully added the deletion entry. Mark ourselves as disconnected.
disconnected_ = true;
}
return status;
}
ray::Status NodeTable::MarkConnected(const GcsNodeInfo &node_info,
const WriteCallback &done) {
RAY_CHECK(node_info.state() == GcsNodeInfo::ALIVE);
auto node_info_ptr = std::make_shared<GcsNodeInfo>(node_info);
return Append(JobID::Nil(), node_log_key_, node_info_ptr, done);
}
ray::Status NodeTable::MarkDisconnected(const NodeID &dead_node_id,
const WriteCallback &done) {
auto node_info = std::make_shared<GcsNodeInfo>();
node_info->set_node_id(dead_node_id.Binary());
node_info->set_state(GcsNodeInfo::DEAD);
return Append(JobID::Nil(), node_log_key_, node_info, done);
}
ray::Status NodeTable::SubscribeToNodeChange(
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe, const StatusCallback &done) {
// Callback for a notification from the client table.
auto on_subscribe = [this](RedisGcsClient *client, const UniqueID &log_key,
const std::vector<GcsNodeInfo> &notifications) {
RAY_CHECK(log_key == node_log_key_);
std::unordered_map<std::string, GcsNodeInfo> connected_nodes;
std::unordered_map<std::string, GcsNodeInfo> disconnected_nodes;
for (auto &notification : notifications) {
// This is temporary fix for Issue 4140 to avoid connect to dead nodes.
// TODO(yuhguo): remove this temporary fix after GCS entry is removable.
if (notification.state() == GcsNodeInfo::ALIVE) {
connected_nodes.emplace(notification.node_id(), notification);
} else {
auto iter = connected_nodes.find(notification.node_id());
if (iter != connected_nodes.end()) {
connected_nodes.erase(iter);
}
disconnected_nodes.emplace(notification.node_id(), notification);
}
}
for (const auto &pair : connected_nodes) {
HandleNotification(client, pair.second);
}
for (const auto &pair : disconnected_nodes) {
HandleNotification(client, pair.second);
}
};
// Callback to request notifications from the client table once we've
// successfully subscribed.
auto on_done = [this, subscribe, done](RedisGcsClient *client) {
auto on_request_notification_done = [this, subscribe, done](Status status) {
RAY_CHECK_OK(status);
if (done != nullptr) {
done(status);
}
// Register node change callbacks after RequestNotification finishes.
RegisterNodeChangeCallback(subscribe);
};
RAY_CHECK_OK(RequestNotifications(JobID::Nil(), node_log_key_, subscribe_id_,
on_request_notification_done));
};
// Subscribe to the client table.
return Subscribe(JobID::Nil(), subscribe_id_, on_subscribe, on_done);
}
bool NodeTable::GetNode(const NodeID &node_id, GcsNodeInfo *node_info) const {
RAY_CHECK(!node_id.IsNil());
auto entry = node_cache_.find(node_id);
auto found = (entry != node_cache_.end());
if (found) {
*node_info = entry->second;
}
return found;
}
const std::unordered_map<NodeID, GcsNodeInfo> &NodeTable::GetAllNodes() const {
return node_cache_;
}
Status NodeTable::Lookup(const Callback &lookup) {
RAY_CHECK(lookup != nullptr);
return Log::Lookup(JobID::Nil(), node_log_key_, lookup);
}
std::string NodeTable::DebugString() const {
std::stringstream result;
result << Log<NodeID, GcsNodeInfo>::DebugString();
result << ", cache size: " << node_cache_.size()
<< ", num removed: " << removed_nodes_.size();
return result.str();
}
Status TaskLeaseTable::Subscribe(const JobID &job_id, const NodeID &node_id,
const Callback &subscribe,
const SubscriptionCallback &done) {
auto on_subscribe = [subscribe](RedisGcsClient *client, const TaskID &task_id,
const std::vector<TaskLeaseData> &data) {
std::vector<boost::optional<TaskLeaseData>> result;
for (const auto &item : data) {
boost::optional<TaskLeaseData> optional_item(item);
result.emplace_back(std::move(optional_item));
}
if (result.empty()) {
boost::optional<TaskLeaseData> optional_item;
result.emplace_back(std::move(optional_item));
}
subscribe(client, task_id, result);
};
return Table<TaskID, TaskLeaseData>::Subscribe(job_id, node_id, on_subscribe, done);
}
std::vector<ActorID> SyncGetAllActorID(redisContext *redis_context,
const std::string &table_prefix) {
std::unordered_set<ActorID> actor_id_set;
size_t cursor = 0;
do {
auto r = redisCommand(redis_context, "SCAN %d match %s* count 100", cursor,
table_prefix.c_str());
auto reply = reinterpret_cast<redisReply *>(r);
RAY_CHECK(reply != nullptr && reply->type == REDIS_REPLY_ARRAY);
RAY_CHECK(reply->elements == 2);
// current cursor
redisReply *cursor_reply = reply->element[0];
RAY_CHECK(cursor_reply != nullptr && cursor_reply->type == REDIS_REPLY_STRING);
cursor = std::stoi(std::string(cursor_reply->str, cursor_reply->len));
// actor ids
redisReply *array_reply = reply->element[1];
RAY_CHECK(array_reply != nullptr && array_reply->type == REDIS_REPLY_ARRAY);
for (size_t i = 0; i < array_reply->elements; ++i) {
redisReply *id_reply = array_reply->element[i];
RAY_CHECK(id_reply != nullptr && id_reply->type == REDIS_REPLY_STRING);
auto id_with_prefix = std::string(id_reply->str, id_reply->len);
// The key of actor_checkpoint table and actor_checkpoint_id table have the same
// prefix of `ACTOR`, so we should check the length of the key to filter them.
if (id_with_prefix.size() == table_prefix.size() + ActorID::Size()) {
auto id = ActorID::FromBinary(id_with_prefix.substr(table_prefix.size()));
actor_id_set.emplace(id);
}
}
} while (cursor != 0);
std::vector<ActorID> actor_id_list;
actor_id_list.reserve(actor_id_set.size());
actor_id_list.insert(actor_id_list.end(), actor_id_set.begin(), actor_id_set.end());
return actor_id_list;
}
std::vector<ActorID> LogBasedActorTable::GetAllActorID() {
auto redis_context = client_->primary_context()->sync_context();
return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_));
}
Status LogBasedActorTable::Get(const ray::ActorID &actor_id,
ray::rpc::ActorTableData *actor_table_data) {
RAY_CHECK(actor_table_data != nullptr);
auto key = TablePrefix_Name(prefix_) + actor_id.Binary();
auto reply = GetRedisContext(actor_id)->RunArgvSync({"LRANGE", key, "-1", "-1"});
if (!reply || reply->IsNil()) {
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
}
const auto &data_list = reply->ReadAsStringArray();
if (data_list.empty()) {
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
}
RAY_CHECK(data_list.size() == 1);
actor_table_data->ParseFromString(data_list.front());
return Status::OK();
}
std::vector<ActorID> ActorTable::GetAllActorID() {
auto redis_context = client_->primary_context()->sync_context();
return SyncGetAllActorID(redis_context, TablePrefix_Name(prefix_));
}
Status ActorTable::Get(const ray::ActorID &actor_id,
ray::rpc::ActorTableData *actor_table_data) {
RAY_CHECK(actor_table_data != nullptr);
auto key = TablePrefix_Name(prefix_) + actor_id.Binary();
auto reply = GetRedisContext(actor_id)->RunArgvSync({"GET", key});
if (!reply || reply->IsNil()) {
return Status::IOError("Failed to get actor data by actor_id " + actor_id.Hex());
}
actor_table_data->ParseFromString(reply->ReadAsString());
return Status::OK();
}
template class Log<ObjectID, ObjectTableData>;
template class Set<ObjectID, ObjectTableData>;
template class Log<TaskID, TaskTableData>;
template class Table<TaskID, TaskTableData>;
template class Log<ActorID, ActorTableData>;
template class Log<TaskID, TaskReconstructionData>;
template class Table<TaskID, TaskLeaseData>;
template class Table<NodeID, HeartbeatTableData>;
template class Table<NodeID, ResourceUsageBatchData>;
template class Log<NodeID, GcsNodeInfo>;
template class Log<JobID, JobTableData>;
template class Log<UniqueID, ProfileTableData>;
template class Log<NodeID, HeartbeatTableData>;
template class Log<NodeID, ResourceUsageBatchData>;
template class Log<WorkerID, WorkerTableData>;
template class Table<WorkerID, WorkerTableData>;
template class Table<ActorID, ActorTableData>;
template class Log<NodeID, ResourceTableData>;
template class Hash<NodeID, ResourceTableData>;
} // namespace gcs
} // namespace ray
-978
View File
@@ -1,978 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "ray/common/constants.h"
#include "ray/common/id.h"
#include "ray/common/status.h"
#include "ray/gcs/callback.h"
#include "ray/gcs/entry_change_notification.h"
#include "ray/gcs/redis_context.h"
#include "ray/util/logging.h"
#include "src/ray/protobuf/gcs.pb.h"
struct redisAsyncContext;
namespace ray {
namespace gcs {
using rpc::ActorTableData;
using rpc::ErrorTableData;
using rpc::GcsChangeMode;
using rpc::GcsEntry;
using rpc::GcsNodeInfo;
using rpc::HeartbeatTableData;
using rpc::JobTableData;
using rpc::ObjectTableData;
using rpc::ProfileTableData;
using rpc::ResourceTableData;
using rpc::ResourceUsageBatchData;
using rpc::TablePrefix;
using rpc::TablePubsub;
using rpc::TaskLeaseData;
using rpc::TaskReconstructionData;
using rpc::TaskTableData;
using rpc::WorkerTableData;
class RedisContext;
class RedisGcsClient;
/// Specifies whether commands issued to a table should be regular or chain-replicated
/// (when available).
enum class CommandType { kRegular, kChain, kUnknown };
/// \class PubsubInterface
///
/// The interface for a pubsub storage system. The client of a storage system
/// that implements this interface can request and cancel notifications for
/// specific keys.
template <typename ID>
class PubsubInterface {
public:
virtual Status RequestNotifications(const JobID &job_id, const ID &id,
const NodeID &node_id,
const StatusCallback &done) = 0;
virtual Status CancelNotifications(const JobID &job_id, const ID &id,
const NodeID &node_id,
const StatusCallback &done) = 0;
virtual ~PubsubInterface(){};
};
template <typename ID, typename Data>
class LogInterface {
public:
using WriteCallback =
std::function<void(RedisGcsClient *client, const ID &id, const Data &data)>;
virtual Status Append(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
virtual Status AppendAt(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data, const WriteCallback &done,
const WriteCallback &failure, int log_length) = 0;
virtual ~LogInterface(){};
};
/// \class Log
///
/// A GCS table where every entry is an append-only log. This class is not
/// meant to be used directly. All log classes should derive from this class
/// and override the prefix_ member with a unique prefix for that log, and the
/// pubsub_channel_ member if pubsub is required.
///
/// Example tables backed by Log:
/// NodeTable: Stores a log of which GCS clients have been added or deleted
/// from the system.
template <typename ID, typename Data>
class Log : public LogInterface<ID, Data>, virtual public PubsubInterface<ID> {
public:
using Callback = std::function<void(RedisGcsClient *client, const ID &id,
const std::vector<Data> &data)>;
using NotificationCallback =
std::function<void(RedisGcsClient *client, const ID &id,
const GcsChangeMode change_mode, const std::vector<Data> &data)>;
/// The callback to call when a write to a key succeeds.
using WriteCallback = typename LogInterface<ID, Data>::WriteCallback;
/// The callback to call when a SUBSCRIBE call completes and we are ready to
/// request and receive notifications.
using SubscriptionCallback = std::function<void(RedisGcsClient *client)>;
struct CallbackData {
ID id;
std::shared_ptr<Data> data;
Callback callback;
// An optional callback to call for subscription operations, where the
// first message is a notification of subscription success.
SubscriptionCallback subscription_callback;
Log<ID, Data> *log;
RedisGcsClient *client;
};
Log(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
: shard_contexts_(contexts),
client_(client),
pubsub_channel_(TablePubsub::NO_PUBLISH),
prefix_(TablePrefix::UNUSED),
subscribe_callback_index_(-1){};
/// Append a log entry to a key.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param data Data to append to the log. TODO(rkn): This can be made const,
/// right?
/// \param done Callback that is called once the data has been written to the
/// GCS.
/// \return Status
Status Append(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done);
/// Append a log entry to a key synchronously.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param data Data to append to the log.
/// \return Status
Status SyncAppend(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data);
/// Append a log entry to a key if and only if the log has the given number
/// of entries.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param data Data to append to the log.
/// \param done Callback that is called if the data was appended to the log.
/// \param failure Callback that is called if the data was not appended to
/// the log because the log length did not match the given `log_length`.
/// \param log_length The number of entries that the log must have for the
/// append to succeed.
/// \return Status
Status AppendAt(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done, const WriteCallback &failure,
int log_length);
/// Lookup the log values at a key asynchronously.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is looked up in the GCS.
/// \param lookup Callback that is called after lookup. If the callback is
/// called with an empty vector, then there was no data at the key.
/// \return Status
Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup);
/// Subscribe to any Append operations to this table. The caller may choose
/// requests notifications for. This may only be called once per Log
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each Add to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe Callback that is called on each received message. If the
/// callback is called with an empty vector, then there was no data at the key.
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
const SubscriptionCallback &done);
/// Request notifications about a key in this table.
///
/// The notifications will be returned via the subscribe callback that was
/// registered by `Subscribe`. An initial notification will be returned for
/// the current values at the key, if any, and a subsequent notification will
/// be published for every following `Append` to the key. Before
/// notifications can be requested, the caller must first call `Subscribe`,
/// with the same `node_id`.
///
/// \param job_id The ID of the job.
/// \param id The ID of the key to request notifications for.
/// \param node_id The node who is requesting notifications.
/// \param done Callback that is called when request notifications is complete.
/// notifications can be requested, a call to `Subscribe` to this
/// table with the same `node_id` must complete successfully.
/// \return Status
Status RequestNotifications(const JobID &job_id, const ID &id, const NodeID &node_id,
const StatusCallback &done);
/// Cancel notifications about a key in this table.
///
/// \param job_id The ID of the job.
/// \param id The ID of the key to request notifications for.
/// \param node_id The node who originally requested notifications.
/// \param done Callback that is called when cancel notifications is complete.
/// \return Status
Status CancelNotifications(const JobID &job_id, const ID &id, const NodeID &node_id,
const StatusCallback &done);
/// Subscribe to any modifications to the key. The caller may choose
/// to subscribe to all modifications, or to subscribe only to keys that it
/// requests notifications for. This may only be called once per Log
/// instance. This function is different from public version due to
/// an additional parameter change_mode in NotificationCallback. Therefore this
/// function supports notifications of remove operations.
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each Add to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe Callback that is called on each received message. If the
/// callback is called with an empty vector, then there was no data at the key.
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const NodeID &node_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done);
/// Delete an entire key from redis.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data to delete from the GCS.
/// \return Void.
void Delete(const JobID &job_id, const ID &id);
/// Delete several keys from redis.
///
/// \param job_id The ID of the job.
/// \param ids The vector of IDs to delete from the GCS.
/// \return Void.
void Delete(const JobID &job_id, const std::vector<ID> &ids);
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
protected:
std::shared_ptr<RedisContext> GetRedisContext(const ID &id) {
static std::hash<ID> index;
return shard_contexts_[index(id) % shard_contexts_.size()];
}
/// The connection to the GCS.
std::vector<std::shared_ptr<RedisContext>> shard_contexts_;
/// The GCS client.
RedisGcsClient *client_;
/// The pubsub channel to subscribe to for notifications about keys in this
/// table. If no notifications are required, this should be set to
/// TablePubsub_NO_PUBLISH. If notifications are required, then this must be
/// unique across all instances of Log.
TablePubsub pubsub_channel_;
/// The prefix to use for keys in this table. This must be unique across all
/// instances of Log.
TablePrefix prefix_;
/// The index in the RedisCallbackManager for the callback that is called
/// when we receive notifications. This is >= 0 iff we have subscribed to the
/// table, otherwise -1.
int64_t subscribe_callback_index_;
/// Commands to a GCS table can either be regular (default) or chain-replicated.
CommandType command_type_ = CommandType::kRegular;
int64_t num_appends_ = 0;
int64_t num_lookups_ = 0;
};
template <typename ID, typename Data>
class TableInterface {
public:
using WriteCallback = typename Log<ID, Data>::WriteCallback;
virtual Status Add(const JobID &job_id, const ID &task_id,
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
virtual ~TableInterface(){};
};
/// \class Table
///
/// A GCS table where every entry is a single data item. This class is not
/// meant to be used directly. All table classes should derive from this class
/// and override the prefix_ member with a unique prefix for that table, and
/// the pubsub_channel_ member if pubsub is required.
///
/// Example tables backed by Log:
/// TaskTable: Stores Task metadata needed for executing the task.
template <typename ID, typename Data>
class Table : private Log<ID, Data>,
public TableInterface<ID, Data>,
virtual public PubsubInterface<ID> {
public:
using Callback =
std::function<void(RedisGcsClient *client, const ID &id, const Data &data)>;
using WriteCallback = typename Log<ID, Data>::WriteCallback;
/// The callback to call when a Lookup call returns an empty entry.
using FailureCallback = std::function<void(RedisGcsClient *client, const ID &id)>;
/// The callback to call when a Subscribe call completes and we are ready to
/// request and receive notifications.
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
Table(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log<ID, Data>(contexts, client) {}
using Log<ID, Data>::RequestNotifications;
using Log<ID, Data>::CancelNotifications;
/// Expose this interface for use by subscription tools class SubscriptionExecutor.
/// In this way TaskTable() can also reuse class SubscriptionExecutor.
using Log<ID, Data>::Subscribe;
/// Add an entry to the table. This overwrites any existing data at the key.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param data Data that is added to the GCS.
/// \param done Callback that is called once the data has been written to the
/// GCS.
/// \return Status
Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done);
/// Lookup an entry asynchronously.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is looked up in the GCS.
/// \param lookup Callback that is called after lookup if there was data the
/// key.
/// \param failure Callback that is called after lookup if there was no data
/// at the key.
/// \return Status
Status Lookup(const JobID &job_id, const ID &id, const Callback &lookup,
const FailureCallback &failure);
/// Subscribe to any Add operations to this table. The caller may choose to
/// subscribe to all Adds, or to subscribe only to keys that it requests
/// notifications for. This may only be called once per Table instance.
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each Add to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe Callback that is called on each received message. If the
/// callback is called with an empty vector, then there was no data at the key.
/// \param failure Callback that is called if the key is empty at the time
/// that notifications are requested.
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
const FailureCallback &failure, const SubscriptionCallback &done);
/// Subscribe to any Add operations to this table. The caller may choose to
/// subscribe to all Adds, or to subscribe only to keys that it requests
/// notifications for. This may only be called once per Table instance.
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each Add to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe Callback that is called on each received message. If the
/// callback is called with an empty vector, then there was no data at the key.
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
const SubscriptionCallback &done);
void Delete(const JobID &job_id, const ID &id) { Log<ID, Data>::Delete(job_id, id); }
void Delete(const JobID &job_id, const std::vector<ID> &ids) {
Log<ID, Data>::Delete(job_id, ids);
}
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
protected:
using Log<ID, Data>::shard_contexts_;
using Log<ID, Data>::client_;
using Log<ID, Data>::pubsub_channel_;
using Log<ID, Data>::prefix_;
using Log<ID, Data>::command_type_;
using Log<ID, Data>::GetRedisContext;
int64_t num_adds_ = 0;
int64_t num_lookups_ = 0;
};
template <typename ID, typename Data>
class SetInterface {
public:
using WriteCallback = typename Log<ID, Data>::WriteCallback;
virtual Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done) = 0;
virtual Status Remove(const JobID &job_id, const ID &id,
const std::shared_ptr<Data> &data, const WriteCallback &done) = 0;
virtual ~SetInterface(){};
};
/// \class Set
///
/// A GCS table where every entry is an addable & removable set. This class is not
/// meant to be used directly. All set classes should derive from this class
/// and override the prefix_ member with a unique prefix for that set, and the
/// pubsub_channel_ member if pubsub is required.
///
/// Example tables backed by Set:
/// ObjectTable: Stores a set of which clients have added an object.
template <typename ID, typename Data>
class Set : private Log<ID, Data>,
public SetInterface<ID, Data>,
virtual public PubsubInterface<ID> {
public:
using Callback = typename Log<ID, Data>::Callback;
using WriteCallback = typename Log<ID, Data>::WriteCallback;
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
Set(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
: Log<ID, Data>(contexts, client) {}
using Log<ID, Data>::RequestNotifications;
using Log<ID, Data>::CancelNotifications;
using Log<ID, Data>::Lookup;
using Log<ID, Data>::Delete;
/// Add an entry to the set.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param data Data to add to the set.
/// \param done Callback that is called once the data has been written to the
/// GCS.
/// \return Status
Status Add(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done);
/// Remove an entry from the set.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is removed from the GCS.
/// \param data Data to remove from the set.
/// \param done Callback that is called once the data has been written to the
/// GCS.
/// \return Status
Status Remove(const JobID &job_id, const ID &id, const std::shared_ptr<Data> &data,
const WriteCallback &done);
using NotificationCallback =
std::function<void(RedisGcsClient *client, const ID &id,
const std::vector<ArrayNotification<Data>> &data)>;
/// Subscribe to any add or remove operations to this table.
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each add or remove to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe Callback that is called on each received message.
/// \param done Callback that is called when subscription is complete and we
/// are ready to receive messages.
/// \return Status
Status Subscribe(const JobID &job_id, const NodeID &node_id,
const NotificationCallback &subscribe,
const SubscriptionCallback &done);
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
protected:
using Log<ID, Data>::shard_contexts_;
using Log<ID, Data>::client_;
using Log<ID, Data>::pubsub_channel_;
using Log<ID, Data>::prefix_;
using Log<ID, Data>::GetRedisContext;
int64_t num_adds_ = 0;
int64_t num_removes_ = 0;
using Log<ID, Data>::num_lookups_;
};
template <typename ID, typename Data>
class HashInterface {
public:
using DataMap = std::unordered_map<std::string, std::shared_ptr<Data>>;
// Reuse Log's SubscriptionCallback when Subscribe is successfully called.
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
/// The callback function used by function Update & Lookup.
///
/// \param client The client on which the RemoveEntries is called.
/// \param id The ID of the Hash Table whose entries are removed.
/// \param data Map data contains the change to the Hash Table.
/// \return Void
using HashCallback =
std::function<void(RedisGcsClient *client, const ID &id, const DataMap &pairs)>;
/// The callback function used by function RemoveEntries.
///
/// \param client The client on which the RemoveEntries is called.
/// \param id The ID of the Hash Table whose entries are removed.
/// \param keys The keys that are moved from this Hash Table.
/// \return Void
using HashRemoveCallback = std::function<void(RedisGcsClient *client, const ID &id,
const std::vector<std::string> &keys)>;
/// The notification function used by function Subscribe.
///
/// \param client The client on which the Subscribe is called.
/// \param change_mode The mode to identify the data is removed or updated.
/// \param data Map data contains the change to the Hash Table.
/// \return Void
using HashNotificationCallback =
std::function<void(RedisGcsClient *client, const ID &id,
const std::vector<MapNotification<std::string, Data>> &data)>;
/// Add entries of a hash table.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is added to the GCS.
/// \param pairs Map data to add to the hash table.
/// \param done HashCallback that is called once the request data has been written to
/// the GCS.
/// \return Status
virtual Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
const HashCallback &done) = 0;
/// Remove entries from the hash table.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is removed from the GCS.
/// \param keys The entry keys of the hash table.
/// \param remove_callback HashRemoveCallback that is called once the data has been
/// written to the GCS no matter whether the key exists in the hash table.
/// \return Status
virtual Status RemoveEntries(const JobID &job_id, const ID &id,
const std::vector<std::string> &keys,
const HashRemoveCallback &remove_callback) = 0;
/// Lookup the map data of a hash table.
///
/// \param job_id The ID of the job.
/// \param id The ID of the data that is looked up in the GCS.
/// \param lookup HashCallback that is called after lookup. If the callback is
/// called with an empty hash table, then there was no data in the callback.
/// \return Status
virtual Status Lookup(const JobID &job_id, const ID &id,
const HashCallback &lookup) = 0;
/// Subscribe to any Update or Remove operations to this hash table.
///
/// \param job_id The ID of the job.
/// \param node_id The type of update to listen to. If this is nil, then a
/// message for each Update to the table will be received. Else, only
/// messages for the given node will be received. In the latter
/// case, the node may request notifications on specific keys in the
/// table via `RequestNotifications`.
/// \param subscribe HashNotificationCallback that is called on each received message.
/// \param done SubscriptionCallback that is called when subscription is complete and
/// we are ready to receive messages.
/// \return Status
virtual Status Subscribe(const JobID &job_id, const NodeID &node_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) = 0;
virtual ~HashInterface(){};
};
template <typename ID, typename Data>
class Hash : private Log<ID, Data>,
public HashInterface<ID, Data>,
virtual public PubsubInterface<ID> {
public:
using DataMap = std::unordered_map<std::string, std::shared_ptr<Data>>;
using HashCallback = typename HashInterface<ID, Data>::HashCallback;
using HashRemoveCallback = typename HashInterface<ID, Data>::HashRemoveCallback;
using HashNotificationCallback =
typename HashInterface<ID, Data>::HashNotificationCallback;
using SubscriptionCallback = typename Log<ID, Data>::SubscriptionCallback;
Hash(const std::vector<std::shared_ptr<RedisContext>> &contexts, RedisGcsClient *client)
: Log<ID, Data>(contexts, client) {}
using Log<ID, Data>::RequestNotifications;
using Log<ID, Data>::CancelNotifications;
Status Update(const JobID &job_id, const ID &id, const DataMap &pairs,
const HashCallback &done) override;
Status Subscribe(const JobID &job_id, const NodeID &node_id,
const HashNotificationCallback &subscribe,
const SubscriptionCallback &done) override;
Status Lookup(const JobID &job_id, const ID &id, const HashCallback &lookup) override;
Status RemoveEntries(const JobID &job_id, const ID &id,
const std::vector<std::string> &keys,
const HashRemoveCallback &remove_callback) override;
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
protected:
using Log<ID, Data>::shard_contexts_;
using Log<ID, Data>::client_;
using Log<ID, Data>::pubsub_channel_;
using Log<ID, Data>::prefix_;
using Log<ID, Data>::subscribe_callback_index_;
using Log<ID, Data>::GetRedisContext;
int64_t num_adds_ = 0;
int64_t num_removes_ = 0;
using Log<ID, Data>::num_lookups_;
};
class DynamicResourceTable : public Hash<NodeID, ResourceTableData> {
public:
DynamicResourceTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Hash(contexts, client) {
pubsub_channel_ = TablePubsub::NODE_RESOURCE_PUBSUB;
prefix_ = TablePrefix::NODE_RESOURCE;
};
virtual ~DynamicResourceTable(){};
};
class ObjectTable : public Set<ObjectID, ObjectTableData> {
public:
ObjectTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Set(contexts, client) {
pubsub_channel_ = TablePubsub::OBJECT_PUBSUB;
prefix_ = TablePrefix::OBJECT;
};
virtual ~ObjectTable(){};
};
class HeartbeatTable : public Table<NodeID, HeartbeatTableData> {
public:
HeartbeatTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::HEARTBEAT_PUBSUB;
prefix_ = TablePrefix::HEARTBEAT;
}
virtual ~HeartbeatTable() {}
};
class ResourceUsageBatchTable : public Table<NodeID, ResourceUsageBatchData> {
public:
ResourceUsageBatchTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::RESOURCE_USAGE_BATCH_PUBSUB;
prefix_ = TablePrefix::RESOURCE_USAGE_BATCH;
}
virtual ~ResourceUsageBatchTable() {}
};
class JobTable : public Log<JobID, JobTableData> {
public:
JobTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log(contexts, client) {
pubsub_channel_ = TablePubsub::JOB_PUBSUB;
prefix_ = TablePrefix::JOB;
};
virtual ~JobTable() {}
};
/// Log-based Actor table starts with an ALIVE entry, which represents the first time the
/// actor is created. This may be followed by 0 or more pairs of RESTARTING, ALIVE
/// entries, which represent each time the actor fails (RESTARTING) and gets recreated
/// (ALIVE). These may be followed by a DEAD entry, which means that the actor has failed
/// and will not be reconstructed.
class LogBasedActorTable : public Log<ActorID, ActorTableData> {
public:
LogBasedActorTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log(contexts, client) {
pubsub_channel_ = TablePubsub::ACTOR_PUBSUB;
prefix_ = TablePrefix::ACTOR;
}
/// Get all actor id synchronously.
std::vector<ActorID> GetAllActorID();
/// Get actor table data by actor id synchronously.
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data);
};
/// Actor table.
/// This table is only used for GCS-based actor management. And when completely migrate to
/// GCS service, the log-based actor table could be removed.
class ActorTable : public Table<ActorID, ActorTableData> {
public:
ActorTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::ACTOR_PUBSUB;
prefix_ = TablePrefix::ACTOR;
}
/// Get all actor id synchronously.
std::vector<ActorID> GetAllActorID();
/// Get actor table data by actor id synchronously.
Status Get(const ActorID &actor_id, ActorTableData *actor_table_data);
};
class WorkerTable : public Table<WorkerID, WorkerTableData> {
public:
WorkerTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::WORKER_FAILURE_PUBSUB;
prefix_ = TablePrefix::WORKERS;
}
virtual ~WorkerTable() {}
};
class TaskReconstructionLog : public Log<TaskID, TaskReconstructionData> {
public:
TaskReconstructionLog(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log(contexts, client) {
prefix_ = TablePrefix::TASK_RECONSTRUCTION;
}
};
class TaskLeaseTable : public Table<TaskID, TaskLeaseData> {
public:
/// Use boost::optional to represent subscription results, so that we can
/// notify raylet whether the entry of task lease is empty.
using Callback =
std::function<void(RedisGcsClient *client, const TaskID &task_id,
const std::vector<boost::optional<TaskLeaseData>> &data)>;
TaskLeaseTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::TASK_LEASE_PUBSUB;
prefix_ = TablePrefix::TASK_LEASE;
}
Status Add(const JobID &job_id, const TaskID &id,
const std::shared_ptr<TaskLeaseData> &data,
const WriteCallback &done) override {
RAY_RETURN_NOT_OK((Table<TaskID, TaskLeaseData>::Add(job_id, id, data, done)));
// Mark the entry for expiration in Redis. It's okay if this command fails
// since the lease entry itself contains the expiration period. In the
// worst case, if the command fails, then a client that looks up the lease
// entry will overestimate the expiration time.
// TODO(swang): Use a common helper function to format the key instead of
// hardcoding it to match the Redis module.
std::vector<std::string> args = {"PEXPIRE", TablePrefix_Name(prefix_) + id.Binary(),
std::to_string(data->timeout())};
return GetRedisContext(id)->RunArgvAsync(args);
}
/// Implement this method for the subscription tools class SubscriptionExecutor.
/// In this way TaskLeaseTable() can also reuse class SubscriptionExecutor.
Status Subscribe(const JobID &job_id, const NodeID &node_id, const Callback &subscribe,
const SubscriptionCallback &done);
};
namespace raylet {
class TaskTable : public Table<TaskID, TaskTableData> {
public:
TaskTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::RAYLET_TASK_PUBSUB;
prefix_ = TablePrefix::RAYLET_TASK;
}
TaskTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client, gcs::CommandType command_type)
: TaskTable(contexts, client) {
command_type_ = command_type;
};
};
} // namespace raylet
class ProfileTable : public Log<UniqueID, ProfileTableData> {
public:
ProfileTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log(contexts, client) {
prefix_ = TablePrefix::PROFILE;
};
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
};
/// \class NodeTable
///
/// The NodeTable stores information about active and inactive nodes. It is
/// structured as a single log stored at a key known to all nodes. When a
/// node connects, it appends an entry to the log indicating that it is
/// alive. When a node disconnects, or if another node detects its failure,
/// it should append an entry to the log indicating that it is dead. A node
/// that is marked as dead should never again be marked as alive; if it needs
/// to reconnect, it must connect with a different NodeID.
class NodeTable : public Log<NodeID, GcsNodeInfo> {
public:
NodeTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
RedisGcsClient *client)
: Log(contexts, client) {
pubsub_channel_ = TablePubsub::NODE_PUBSUB;
prefix_ = TablePrefix::NODE;
};
/// Connect as a NODE to the GCS. This registers us in the NODE table
/// and begins subscription to NODE table notifications.
///
/// \param local_node_info Information about the connecting NODE. This must have the
/// same id as the one set in the NODE table.
/// \return Status
ray::Status Connect(const GcsNodeInfo &local_node_info);
/// Disconnect the NODE from the GCS. The NODE ID assigned during
/// registration should never be reused after disconnecting.
///
/// \return Status
ray::Status Disconnect();
/// Mark a new node as connected to GCS asynchronously.
///
/// \param node_info Information about the node.
/// \param done Callback that is called once the node has been marked to connected.
/// \return Status
ray::Status MarkConnected(const GcsNodeInfo &node_info, const WriteCallback &done);
/// Mark a different node as disconnected. The NODE ID should never be
/// reused for a new node.
///
/// \param dead_node_id The ID of the node to mark as dead.
/// \param done Callback that is called once the node has been marked to
/// disconnected.
/// \return Status
ray::Status MarkDisconnected(const NodeID &dead_node_id, const WriteCallback &done);
ray::Status SubscribeToNodeChange(
const SubscribeCallback<NodeID, GcsNodeInfo> &subscribe,
const StatusCallback &done);
/// Get a node's information from the cache. The cache only contains
/// information for nodes that we've heard a notification for.
///
/// \param node The node to get information about.
/// \param node_info The node information will be copied here if
/// we have the node in the cache.
/// a nil node ID.
/// \return Whether the node is in the cache.
bool GetNode(const NodeID &node, GcsNodeInfo *node_info) const;
/// Get the local node's ID.
///
/// \return The local node's ID.
const NodeID &GetLocalNodeId() const;
/// Get the local node's information.
///
/// \return The local node's information.
const GcsNodeInfo &GetLocalNode() const;
/// Check whether the given node is removed.
///
/// \param node_id The ID of the node to check.
/// \return Whether the node with specified ID is removed.
bool IsRemoved(const NodeID &node_id) const;
/// Get the information of all nodes.
///
/// \return The node ID to node information map.
const std::unordered_map<NodeID, GcsNodeInfo> &GetAllNodes() const;
/// Lookup the node data in the node table.
///
/// \param lookup Callback that is called after lookup. If the callback is
/// called with an empty vector, then there was no data at the key.
/// \return Status.
Status Lookup(const Callback &lookup);
/// Returns debug string for class.
///
/// \return string.
std::string DebugString() const;
/// The key at which the log of node information is stored. This key must
/// be kept the same across all instances of the NodeTable, so that all
/// nodes append and read from the same key.
NodeID node_log_key_;
private:
using NodeChangeCallback =
std::function<void(const NodeID &id, const GcsNodeInfo &node_info)>;
/// Register a callback to call when a new node is added or a node is removed.
///
/// \param callback The callback to register.
void RegisterNodeChangeCallback(const NodeChangeCallback &callback);
/// Handle a node table notification.
void HandleNotification(RedisGcsClient *client, const GcsNodeInfo &node_info);
/// Whether this node has called Disconnect().
bool disconnected_{false};
/// This node's ID. It will be initialized when we call method `Connect(...)`.
NodeID local_node_id_;
/// Information about this node.
GcsNodeInfo local_node_info_;
/// This ID is used in method `SubscribeToNodeChange(...)` to Subscribe and
/// RequestNotification.
/// The reason for not using `local_node_id_` is because it is only initialized
/// for registered nodes.
NodeID subscribe_id_{NodeID::FromRandom()};
/// The callback to call when a new node is added or a node is removed.
NodeChangeCallback node_change_callback_{nullptr};
/// A cache for information about all nodes.
std::unordered_map<NodeID, GcsNodeInfo> node_cache_;
/// The set of removed nodes.
std::unordered_set<NodeID> removed_nodes_;
};
} // namespace gcs
} // namespace ray
-95
View File
@@ -1,95 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <atomic>
#include <chrono>
#include <string>
#include <thread>
#include <vector>
#include "gtest/gtest.h"
#include "ray/common/test_util.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_gcs_client.h"
namespace ray {
namespace gcs {
template <typename ID, typename Data>
class AccessorTestBase : public ::testing::Test {
public:
AccessorTestBase() { TestSetupUtil::StartUpRedisServers(std::vector<int>()); }
virtual ~AccessorTestBase() { TestSetupUtil::ShutDownRedisServers(); }
virtual void SetUp() {
GenTestData();
GcsClientOptions options =
GcsClientOptions("127.0.0.1", TEST_REDIS_SERVER_PORTS.front(), "", true);
gcs_client_.reset(new RedisGcsClient(options));
RAY_CHECK_OK(gcs_client_->Connect(io_service_));
work_thread_.reset(new std::thread([this] {
std::unique_ptr<boost::asio::io_service::work> work(
new boost::asio::io_service::work(io_service_));
io_service_.run();
}));
}
virtual void TearDown() {
gcs_client_->Disconnect();
io_service_.stop();
work_thread_->join();
work_thread_.reset();
gcs_client_.reset();
ClearTestData();
}
protected:
virtual void GenTestData() = 0;
void ClearTestData() { id_to_data_.clear(); }
void WaitPendingDone(std::chrono::milliseconds timeout) {
WaitPendingDone(pending_count_, timeout);
}
void WaitPendingDone(std::atomic<int> &pending_count,
std::chrono::milliseconds timeout) {
auto condition = [&pending_count]() { return pending_count == 0; };
EXPECT_TRUE(WaitForCondition(condition, timeout.count()));
}
protected:
std::unique_ptr<RedisGcsClient> gcs_client_;
boost::asio::io_service io_service_;
std::unique_ptr<std::thread> work_thread_;
std::unordered_map<ID, std::shared_ptr<Data>> id_to_data_;
std::atomic<int> pending_count_{0};
std::chrono::milliseconds wait_pending_timeout_{10000};
};
} // namespace gcs
} // namespace ray
@@ -1,82 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <atomic>
#include <chrono>
#include <string>
#include <thread>
#include <vector>
#include "gtest/gtest.h"
#include "ray/common/test_util.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/test/accessor_test_base.h"
namespace ray {
namespace gcs {
class ActorInfoAccessorTest : public AccessorTestBase<ActorID, ActorTableData> {
protected:
virtual void GenTestData() {
for (size_t i = 0; i < 100; ++i) {
std::shared_ptr<ActorTableData> actor = std::make_shared<ActorTableData>();
actor->set_max_restarts(1);
actor->set_num_restarts(0);
JobID job_id = JobID::FromInt(i);
actor->set_job_id(job_id.Binary());
actor->set_state(ActorTableData::ALIVE);
ActorID actor_id = ActorID::Of(job_id, RandomTaskId(), /*parent_task_counter=*/i);
actor->set_actor_id(actor_id.Binary());
id_to_data_[actor_id] = actor;
}
}
size_t checkpoint_number_{2};
};
TEST_F(ActorInfoAccessorTest, Subscribe) {
ActorInfoAccessor &actor_accessor = gcs_client_->Actors();
// subscribe
std::atomic<int> sub_pending_count(0);
std::atomic<int> do_sub_pending_count(0);
auto subscribe = [this, &sub_pending_count](const ActorID &actor_id,
const ActorTableData &data) {
const auto it = id_to_data_.find(actor_id);
ASSERT_TRUE(it != id_to_data_.end());
--sub_pending_count;
};
auto done = [&do_sub_pending_count](Status status) {
RAY_CHECK_OK(status);
--do_sub_pending_count;
};
++do_sub_pending_count;
RAY_CHECK_OK(actor_accessor.AsyncSubscribeAll(subscribe, done));
// Wait until subscribe finishes.
WaitPendingDone(do_sub_pending_count, wait_pending_timeout_);
}
} // namespace gcs
} // namespace ray
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
RAY_CHECK(argc == 4);
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
return RUN_ALL_TESTS();
}
File diff suppressed because it is too large Load Diff
@@ -1,99 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "gtest/gtest.h"
#include "ray/common/test_util.h"
#include "ray/gcs/pb_util.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/test/accessor_test_base.h"
namespace ray {
namespace gcs {
class RedisJobInfoAccessorTest : public AccessorTestBase<JobID, JobTableData> {
protected:
virtual void GenTestData() {
for (size_t i = 0; i < total_job_number_; ++i) {
JobID job_id = JobID::FromInt(i);
std::shared_ptr<JobTableData> job_data_ptr =
CreateJobTableData(job_id, /*is_dead*/ false, /*timestamp*/ 1,
/*driver_ip_address*/ "", /*driver_pid*/ i);
id_to_data_[job_id] = job_data_ptr;
}
}
std::atomic<int> subscribe_pending_count_{0};
size_t total_job_number_{100};
};
TEST_F(RedisJobInfoAccessorTest, AddAndSubscribe) {
JobInfoAccessor &job_accessor = gcs_client_->Jobs();
// SubscribeAll
auto on_subscribe = [this](const JobID &job_id, const JobTableData &data) {
const auto it = id_to_data_.find(job_id);
RAY_CHECK(it != id_to_data_.end());
if (data.is_dead()) {
--subscribe_pending_count_;
}
};
auto on_done = [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
};
++pending_count_;
RAY_CHECK_OK(job_accessor.AsyncSubscribeAll(on_subscribe, on_done));
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
// Register
for (const auto &item : id_to_data_) {
++pending_count_;
RAY_CHECK_OK(job_accessor.AsyncAdd(item.second, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
// Update
for (auto &item : id_to_data_) {
++pending_count_;
++subscribe_pending_count_;
RAY_CHECK_OK(job_accessor.AsyncMarkFinished(item.first, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(subscribe_pending_count_, wait_pending_timeout_);
}
} // namespace gcs
} // namespace ray
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
RAY_CHECK(argc == 4);
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
return RUN_ALL_TESTS();
}
@@ -1,181 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include "gtest/gtest.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/test/accessor_test_base.h"
namespace ray {
namespace gcs {
class NodeDynamicResourceTest : public AccessorTestBase<NodeID, ResourceTableData> {
protected:
typedef NodeResourceInfoAccessor::ResourceMap ResourceMap;
virtual void GenTestData() {
for (size_t node_index = 0; node_index < node_number_; ++node_index) {
NodeID id = NodeID::FromRandom();
ResourceMap resource_map;
for (size_t rs_index = 0; rs_index < resource_type_number_; ++rs_index) {
std::shared_ptr<ResourceTableData> rs_data =
std::make_shared<ResourceTableData>();
rs_data->set_resource_capacity(rs_index);
std::string resource_name = std::to_string(rs_index);
resource_map[resource_name] = rs_data;
if (resource_to_delete_.empty()) {
resource_to_delete_.emplace_back(resource_name);
}
}
id_to_resource_map_[id] = std::move(resource_map);
}
}
std::unordered_map<NodeID, ResourceMap> id_to_resource_map_;
size_t node_number_{100};
size_t resource_type_number_{5};
std::vector<std::string> resource_to_delete_;
std::atomic<int> sub_pending_count_{0};
std::atomic<int> do_sub_pending_count_{0};
};
TEST_F(NodeDynamicResourceTest, UpdateAndGet) {
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
const NodeID &id = node_rs.first;
// Update
Status status = node_resource_accessor.AsyncUpdateResources(
node_rs.first, node_rs.second,
[this, &node_resource_accessor, id](Status status) {
RAY_CHECK_OK(status);
auto get_callback = [this, id](Status status,
const boost::optional<ResourceMap> &result) {
--pending_count_;
RAY_CHECK_OK(status);
const auto it = id_to_resource_map_.find(id);
ASSERT_TRUE(result);
ASSERT_EQ(it->second.size(), result->size());
};
// Get
status = node_resource_accessor.AsyncGetResources(id, get_callback);
RAY_CHECK_OK(status);
});
}
WaitPendingDone(wait_pending_timeout_);
}
TEST_F(NodeDynamicResourceTest, Delete) {
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
// Update
Status status = node_resource_accessor.AsyncUpdateResources(
node_rs.first, node_rs.second, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
});
}
WaitPendingDone(wait_pending_timeout_);
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
const NodeID &id = node_rs.first;
// Delete
Status status = node_resource_accessor.AsyncDeleteResources(
id, resource_to_delete_, [this, &node_resource_accessor, id](Status status) {
RAY_CHECK_OK(status);
// Get
status = node_resource_accessor.AsyncGetResources(
id, [this, id](Status status, const boost::optional<ResourceMap> &result) {
--pending_count_;
RAY_CHECK_OK(status);
const auto it = id_to_resource_map_.find(id);
ASSERT_TRUE(result);
ASSERT_EQ(it->second.size() - resource_to_delete_.size(), result->size());
});
});
}
WaitPendingDone(wait_pending_timeout_);
}
TEST_F(NodeDynamicResourceTest, Subscribe) {
NodeResourceInfoAccessor &node_resource_accessor = gcs_client_->NodeResources();
for (const auto &node_rs : id_to_resource_map_) {
++pending_count_;
// Update
Status status = node_resource_accessor.AsyncUpdateResources(
node_rs.first, node_rs.second, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
});
}
WaitPendingDone(wait_pending_timeout_);
auto subscribe = [this](const rpc::NodeResourceChange &notification) {
auto id = NodeID::FromBinary(notification.node_id());
RAY_LOG(INFO) << "receive client id=" << id;
auto it = id_to_resource_map_.find(id);
ASSERT_TRUE(it != id_to_resource_map_.end());
if (0 == notification.deleted_resources_size()) {
ASSERT_EQ(notification.updated_resources_size(), it->second.size());
} else {
ASSERT_EQ(notification.deleted_resources_size(), resource_to_delete_.size());
}
--sub_pending_count_;
};
auto done = [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
};
// Subscribe
++pending_count_;
Status status = node_resource_accessor.AsyncSubscribeToResources(subscribe, done);
RAY_CHECK_OK(status);
for (const auto &node_rs : id_to_resource_map_) {
// Delete
++pending_count_;
++sub_pending_count_;
Status status = node_resource_accessor.AsyncDeleteResources(
node_rs.first, resource_to_delete_, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
});
RAY_CHECK_OK(status);
}
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(sub_pending_count_, wait_pending_timeout_);
}
} // namespace gcs
} // namespace ray
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
RAY_CHECK(argc == 4);
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
return RUN_ALL_TESTS();
}
@@ -1,160 +0,0 @@
// Copyright 2017 The Ray Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <unordered_map>
#include <vector>
#include "gtest/gtest.h"
#include "ray/common/test_util.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/test/accessor_test_base.h"
namespace ray {
namespace gcs {
class RedisObjectInfoAccessorTest : public AccessorTestBase<ObjectID, ObjectTableData> {
protected:
void GenTestData() {
for (size_t i = 0; i < object_count_; ++i) {
ObjectVector object_vec;
for (size_t j = 0; j < copy_count_; ++j) {
auto object = std::make_shared<ObjectTableData>();
NodeID node_id = NodeID::FromRandom();
object->set_manager(node_id.Binary());
object_vec.emplace_back(std::move(object));
}
ObjectID id = ObjectID::FromRandom();
object_id_to_data_[id] = object_vec;
}
}
typedef std::vector<std::shared_ptr<ObjectTableData>> ObjectVector;
std::unordered_map<ObjectID, ObjectVector> object_id_to_data_;
size_t object_count_{100};
size_t copy_count_{5};
};
TEST_F(RedisObjectInfoAccessorTest, TestGetAddRemove) {
ObjectInfoAccessor &object_accessor = gcs_client_->Objects();
// add && get
// add
for (const auto &elem : object_id_to_data_) {
for (const auto &item : elem.second) {
++pending_count_;
NodeID node_id = NodeID::FromBinary(item->manager());
RAY_CHECK_OK(
object_accessor.AsyncAddLocation(elem.first, node_id, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
}));
}
}
WaitPendingDone(wait_pending_timeout_);
// get
for (const auto &elem : object_id_to_data_) {
++pending_count_;
size_t total_size = elem.second.size();
RAY_CHECK_OK(object_accessor.AsyncGetLocations(
elem.first,
[this, total_size](Status status,
const boost::optional<rpc::ObjectLocationInfo> &result) {
RAY_CHECK_OK(status);
ASSERT_EQ(total_size, result->locations().size());
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
RAY_LOG(INFO) << "Case Add && Get done.";
// subscribe && delete
// subscribe
std::atomic<int> sub_pending_count(0);
auto subscribe = [this, &sub_pending_count](
const ObjectID &object_id,
const std::vector<rpc::ObjectLocationChange> &result) {
const auto it = object_id_to_data_.find(object_id);
ASSERT_TRUE(it != object_id_to_data_.end());
static size_t response_count = 1;
size_t cur_count = response_count <= object_count_ ? copy_count_ : 1;
ASSERT_EQ(result.size(), cur_count);
bool change_mode = response_count <= object_count_;
for (const auto &res : result) {
ASSERT_EQ(change_mode, res.is_add());
}
++response_count;
--sub_pending_count;
};
for (const auto &elem : object_id_to_data_) {
++pending_count_;
++sub_pending_count;
RAY_CHECK_OK(object_accessor.AsyncSubscribeToLocations(elem.first, subscribe,
[this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(sub_pending_count, wait_pending_timeout_);
// delete
for (const auto &elem : object_id_to_data_) {
++pending_count_;
++sub_pending_count;
const ObjectVector &object_vec = elem.second;
NodeID node_id = NodeID::FromBinary(object_vec[0]->manager());
RAY_CHECK_OK(
object_accessor.AsyncRemoveLocation(elem.first, node_id, [this](Status status) {
RAY_CHECK_OK(status);
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
WaitPendingDone(sub_pending_count, wait_pending_timeout_);
// get
for (const auto &elem : object_id_to_data_) {
++pending_count_;
size_t total_size = elem.second.size();
RAY_CHECK_OK(object_accessor.AsyncGetLocations(
elem.first,
[this, total_size](Status status,
const boost::optional<rpc::ObjectLocationInfo> &result) {
RAY_CHECK_OK(status);
ASSERT_EQ(total_size - 1, result->locations().size());
--pending_count_;
}));
}
WaitPendingDone(wait_pending_timeout_);
RAY_LOG(INFO) << "Case Subscribe && Delete done.";
}
} // namespace gcs
} // namespace ray
int main(int argc, char **argv) {
InitShutdownRAII ray_log_shutdown_raii(ray::RayLog::StartRayLog,
ray::RayLog::ShutDownRayLog, argv[0],
ray::RayLogLevel::INFO,
/*log_dir=*/"");
::testing::InitGoogleTest(&argc, argv);
RAY_CHECK(argc == 4);
ray::TEST_REDIS_SERVER_EXEC_PATH = argv[1];
ray::TEST_REDIS_CLIENT_EXEC_PATH = argv[2];
ray::TEST_REDIS_MODULE_LIBRARY_PATH = argv[3];
return RUN_ALL_TESTS();
}
+1 -1
View File
@@ -22,7 +22,7 @@
#include "ray/common/id.h"
#include "ray/common/status.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client.h"
#include "ray/object_manager/format/object_manager_generated.h"
namespace ray {
@@ -23,7 +23,7 @@
#include "absl/container/flat_hash_map.h"
#include "ray/common/id.h"
#include "ray/common/status.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client.h"
#include "ray/object_manager/format/object_manager_generated.h"
#include "ray/object_manager/object_directory.h"
#include "ray/rpc/worker/core_worker_client.h"
@@ -20,6 +20,7 @@
#include "gtest/gtest.h"
#include "ray/common/status.h"
#include "ray/common/test_util.h"
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
#include "ray/object_manager/object_manager.h"
#include "ray/util/filesystem.h"
#include "src/ray/protobuf/common.pb.h"
@@ -32,10 +33,24 @@ namespace ray {
using rpc::GcsNodeInfo;
static inline void flushall_redis(void) {
static inline bool flushall_redis(void) {
redisContext *context = redisConnect("127.0.0.1", 6379);
if (context == nullptr || context->err) {
return false;
}
freeReplyObject(redisCommand(context, "FLUSHALL"));
freeReplyObject(redisCommand(context, "SET NumRedisShards 1"));
freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380"));
redisFree(context);
redisContext *shard_context = redisConnect("127.0.0.1", 6380);
if (shard_context == nullptr || shard_context->err) {
return false;
}
freeReplyObject(redisCommand(shard_context, "FLUSHALL"));
redisFree(shard_context);
return true;
}
int64_t current_time_ms() {
@@ -71,6 +86,7 @@ class MockServer {
node_info.set_object_manager_port(object_manager_port);
ray::Status status = gcs_client_->Nodes().RegisterSelf(node_info, nullptr);
std::this_thread::sleep_for(std::chrono::milliseconds(5000));
return status;
}
@@ -85,7 +101,7 @@ class MockServer {
class TestObjectManagerBase : public ::testing::Test {
public:
void SetUp() {
flushall_redis();
WaitForCondition(flushall_redis, 7000);
// start store
socket_name_1 = TestSetupUtil::StartObjectStore();
@@ -96,9 +112,10 @@ class TestObjectManagerBase : public ::testing::Test {
int push_timeout_ms = 10000;
// start first server
gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1");
gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "",
/*is_test_client=*/true);
gcs_client_1 = std::make_shared<gcs::RedisGcsClient>(client_options);
/*is_test_client=*/false);
gcs_client_1 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
RAY_CHECK_OK(gcs_client_1->Connect(main_service));
ObjectManagerConfig om_config_1;
om_config_1.store_socket_name = socket_name_1;
@@ -110,7 +127,7 @@ class TestObjectManagerBase : public ::testing::Test {
server1.reset(new MockServer(main_service, om_config_1, gcs_client_1));
// start second server
gcs_client_2 = std::make_shared<gcs::RedisGcsClient>(client_options);
gcs_client_2 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
RAY_CHECK_OK(gcs_client_2->Connect(main_service));
ObjectManagerConfig om_config_2;
om_config_2.store_socket_name = socket_name_2;
@@ -139,6 +156,10 @@ class TestObjectManagerBase : public ::testing::Test {
TestSetupUtil::StopObjectStore(socket_name_1);
TestSetupUtil::StopObjectStore(socket_name_2);
if (!gcs_server_socket_name_.empty()) {
TestSetupUtil::StopGcsServer(gcs_server_socket_name_);
}
}
ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) {
@@ -172,6 +193,7 @@ class TestObjectManagerBase : public ::testing::Test {
std::vector<ObjectID> v1;
std::vector<ObjectID> v2;
std::string gcs_server_socket_name_;
std::string socket_name_1;
std::string socket_name_2;
};
@@ -421,5 +443,6 @@ TEST_F(StressTestObjectManager, StartStressTestObjectManager) {
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
ray::TEST_STORE_EXEC_PATH = std::string(argv[1]);
ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[2]);
return RUN_ALL_TESTS();
}
@@ -20,6 +20,7 @@
#include "gtest/gtest.h"
#include "ray/common/status.h"
#include "ray/common/test_util.h"
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
#include "ray/util/filesystem.h"
#include "src/ray/protobuf/common.pb.h"
@@ -38,6 +39,8 @@ using rpc::GcsNodeInfo;
static inline void flushall_redis(void) {
redisContext *context = redisConnect("127.0.0.1", 6379);
freeReplyObject(redisCommand(context, "FLUSHALL"));
freeReplyObject(redisCommand(context, "SET NumRedisShards 1"));
freeReplyObject(redisCommand(context, "LPUSH RedisShards 127.0.0.1:6380"));
redisFree(context);
}
@@ -91,9 +94,10 @@ class TestObjectManagerBase : public ::testing::Test {
push_timeout_ms = 1500;
// start first server
gcs_server_socket_name_ = TestSetupUtil::StartGcsServer("127.0.0.1");
gcs::GcsClientOptions client_options("127.0.0.1", 6379, /*password*/ "",
/*is_test_client=*/true);
gcs_client_1 = std::make_shared<gcs::RedisGcsClient>(client_options);
gcs_client_1 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
RAY_CHECK_OK(gcs_client_1->Connect(main_service));
ObjectManagerConfig om_config_1;
om_config_1.store_socket_name = socket_name_1;
@@ -105,7 +109,7 @@ class TestObjectManagerBase : public ::testing::Test {
server1.reset(new MockServer(main_service, om_config_1, gcs_client_1));
// start second server
gcs_client_2 = std::make_shared<gcs::RedisGcsClient>(client_options);
gcs_client_2 = std::make_shared<gcs::ServiceBasedGcsClient>(client_options);
RAY_CHECK_OK(gcs_client_2->Connect(main_service));
ObjectManagerConfig om_config_2;
om_config_2.store_socket_name = socket_name_2;
@@ -134,6 +138,10 @@ class TestObjectManagerBase : public ::testing::Test {
TestSetupUtil::StopObjectStore(socket_name_1);
TestSetupUtil::StopObjectStore(socket_name_2);
if (!gcs_server_socket_name_.empty()) {
TestSetupUtil::StopGcsServer(gcs_server_socket_name_);
}
}
ObjectID WriteDataToClient(plasma::PlasmaClient &client, int64_t data_size) {
@@ -171,6 +179,7 @@ class TestObjectManagerBase : public ::testing::Test {
std::vector<ObjectID> v1;
std::vector<ObjectID> v2;
std::string gcs_server_socket_name_;
std::string socket_name_1;
std::string socket_name_2;
@@ -482,5 +491,6 @@ int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
ray::TEST_STORE_EXEC_PATH = std::string(argv[1]);
wait_timeout_ms = std::stoi(std::string(argv[2]));
ray::TEST_GCS_SERVER_EXEC_PATH = std::string(argv[3]);
return RUN_ALL_TESTS();
}
+3 -3
View File
@@ -306,7 +306,7 @@ ray::Status NodeManager::RegisterGcs() {
// node failure. These workers can be identified by comparing the raylet_id
// in their rpc::Address to the ID of a failed raylet.
const auto &worker_failure_handler =
[this](const WorkerID &id, const gcs::WorkerTableData &worker_failure_data) {
[this](const WorkerID &id, const rpc::WorkerTableData &worker_failure_data) {
HandleUnexpectedWorkerFailure(worker_failure_data.worker_address());
};
RAY_CHECK_OK(gcs_client_->Workers().AsyncSubscribeToWorkerFailures(
@@ -1984,8 +1984,8 @@ void NodeManager::ProcessSetResourceRequest(
RAY_CHECK_OK(gcs_client_->NodeResources().AsyncDeleteResources(
node_id, {resource_name}, nullptr));
} else {
std::unordered_map<std::string, std::shared_ptr<gcs::ResourceTableData>> data_map;
auto resource_table_data = std::make_shared<gcs::ResourceTableData>();
std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>> data_map;
auto resource_table_data = std::make_shared<rpc::ResourceTableData>();
resource_table_data->set_resource_capacity(capacity);
data_map.emplace(resource_name, resource_table_data);
RAY_CHECK_OK(
+2 -2
View File
@@ -139,10 +139,10 @@ ray::Status Raylet::RegisterGcs() {
// Add resource information.
const NodeManagerConfig &node_manager_config = node_manager_.GetInitialConfig();
std::unordered_map<std::string, std::shared_ptr<gcs::ResourceTableData>> resources;
std::unordered_map<std::string, std::shared_ptr<rpc::ResourceTableData>> resources;
for (const auto &resource_pair :
node_manager_config.resource_config.GetResourceMap()) {
auto resource = std::make_shared<gcs::ResourceTableData>();
auto resource = std::make_shared<rpc::ResourceTableData>();
resource->set_resource_capacity(resource_pair.second);
resources.emplace(resource_pair.first, resource);
}
+1 -1
View File
@@ -20,7 +20,7 @@
#include <unordered_set>
#include "ray/common/id.h"
#include "ray/gcs/tables.h"
#include "ray/gcs/gcs_client.h"
#include "ray/object_manager/object_directory.h"
namespace ray {
+11 -8
View File
@@ -21,9 +21,11 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "ray/gcs/callback.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/gcs_client/service_based_accessor.h"
#include "ray/gcs/gcs_client/service_based_gcs_client.h"
#include "ray/object_manager/object_directory.h"
#include "ray/raylet/format/node_manager_generated.h"
#include "ray/raylet/reconstruction_policy.h"
namespace ray {
@@ -97,17 +99,18 @@ class MockObjectDirectory : public ObjectDirectoryInterface {
std::unordered_map<ObjectID, std::unordered_set<NodeID>> locations_;
};
class MockNodeInfoAccessor : public gcs::RedisNodeInfoAccessor {
class MockNodeInfoAccessor : public gcs::ServiceBasedNodeInfoAccessor {
public:
MockNodeInfoAccessor(gcs::RedisGcsClient *client)
: gcs::RedisNodeInfoAccessor(client) {}
MockNodeInfoAccessor(gcs::ServiceBasedGcsClient *client)
: gcs::ServiceBasedNodeInfoAccessor(client) {}
bool IsRemoved(const NodeID &node_id) const override { return false; }
};
class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor {
class MockTaskInfoAccessor : public gcs::ServiceBasedTaskInfoAccessor {
public:
MockTaskInfoAccessor(gcs::RedisGcsClient *client) : RedisTaskInfoAccessor(client) {}
MockTaskInfoAccessor(gcs::ServiceBasedGcsClient *client)
: ServiceBasedTaskInfoAccessor(client) {}
Status AsyncSubscribeTaskLease(
const TaskID &task_id,
@@ -180,9 +183,9 @@ class MockTaskInfoAccessor : public gcs::RedisTaskInfoAccessor {
task_reconstruction_log_;
};
class MockGcs : public gcs::RedisGcsClient {
class MockGcs : public gcs::ServiceBasedGcsClient {
public:
MockGcs() : gcs::RedisGcsClient(gcs::GcsClientOptions("", 0, "")){};
MockGcs() : gcs::ServiceBasedGcsClient(gcs::GcsClientOptions("", 0, "")){};
void Init(gcs::TaskInfoAccessor *task_accessor, gcs::NodeInfoAccessor *node_accessor) {
task_accessor_.reset(task_accessor);
-1
View File
@@ -17,7 +17,6 @@
// clang-format off
#include "ray/common/id.h"
#include "ray/common/task/task.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/object_manager/object_manager.h"
#include "ray/raylet/reconstruction_policy.h"
// clang-format on
@@ -21,8 +21,6 @@
#include "gtest/gtest.h"
#include "ray/common/task/task_util.h"
#include "ray/common/test_util.h"
#include "ray/gcs/redis_accessor.h"
#include "ray/gcs/redis_gcs_client.h"
namespace ray {
+1 -1
View File
@@ -26,7 +26,7 @@
#include "ray/common/client_connection.h"
#include "ray/common/task/task.h"
#include "ray/common/task/task_common.h"
#include "ray/gcs/redis_gcs_client.h"
#include "ray/gcs/gcs_client.h"
#include "ray/raylet/worker.h"
namespace ray {
+6 -2
View File
@@ -25,18 +25,22 @@ fi
REDIS_MODULE="./bazel-bin/libray_redis_module.so"
LOAD_MODULE_ARGS=(--loadmodule "${REDIS_MODULE}")
STORE_EXEC="./bazel-bin/plasma_store_server"
GCS_SERVER_EXEC="./bazel-bin/gcs_server"
# Allow cleanup commands to fail.
bazel run //:redis-cli -- -p 6379 shutdown || true
bazel run //:redis-cli -- -p 6380 shutdown || true
sleep 1s
bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6379 &
bazel run //:redis-server -- --loglevel warning "${LOAD_MODULE_ARGS[@]}" --port 6380 &
sleep 1s
# Run tests.
./bazel-bin/object_manager_stress_test $STORE_EXEC
./bazel-bin/object_manager_stress_test $STORE_EXEC $GCS_SERVER_EXEC
sleep 1s
# Use timeout=1000ms for the Wait tests.
./bazel-bin/object_manager_test $STORE_EXEC 1000
./bazel-bin/object_manager_test $STORE_EXEC 1000 $GCS_SERVER_EXEC
bazel run //:redis-cli -- -p 6379 shutdown
bazel run //:redis-cli -- -p 6380 shutdown
sleep 1s
# Include raylet integration test once it's ready.