Batch heartbeats from node manager together in the monitor. (#3011)

This commit is contained in:
Ujval Misra
2018-11-20 09:52:27 -08:00
committed by Robert Nishihara
parent abdc3b592e
commit b0bfd104f2
13 changed files with 135 additions and 37 deletions
+5
View File
@@ -109,6 +109,7 @@ AsyncGcsClient::AsyncGcsClient(const std::string &address, int port,
client_table_.reset(new ClientTable({primary_context_}, this, client_id));
error_table_.reset(new ErrorTable({primary_context_}, this));
driver_table_.reset(new DriverTable({primary_context_}, this));
heartbeat_batch_table_.reset(new HeartbeatBatchTable({primary_context_}, this));
// Tables below would be sharded.
object_table_.reset(new ObjectTable(shard_contexts_, this, command_type));
actor_table_.reset(new ActorTable(shard_contexts_, this));
@@ -214,6 +215,10 @@ ClassTable &AsyncGcsClient::class_table() { return *class_table_; }
HeartbeatTable &AsyncGcsClient::heartbeat_table() { return *heartbeat_table_; }
HeartbeatBatchTable &AsyncGcsClient::heartbeat_batch_table() {
return *heartbeat_batch_table_;
}
ErrorTable &AsyncGcsClient::error_table() { return *error_table_; }
DriverTable &AsyncGcsClient::driver_table() { return *driver_table_; }
+2
View File
@@ -60,6 +60,7 @@ class RAY_EXPORT AsyncGcsClient {
TaskLeaseTable &task_lease_table();
ClientTable &client_table();
HeartbeatTable &heartbeat_table();
HeartbeatBatchTable &heartbeat_batch_table();
ErrorTable &error_table();
DriverTable &driver_table();
ProfileTable &profile_table();
@@ -89,6 +90,7 @@ class RAY_EXPORT AsyncGcsClient {
std::unique_ptr<TaskReconstructionLog> task_reconstruction_log_;
std::unique_ptr<TaskLeaseTable> task_lease_table_;
std::unique_ptr<HeartbeatTable> heartbeat_table_;
std::unique_ptr<HeartbeatBatchTable> heartbeat_batch_table_;
std::unique_ptr<ErrorTable> error_table_;
std::unique_ptr<ProfileTable> profile_table_;
std::unique_ptr<ClientTable> client_table_;
+6
View File
@@ -15,6 +15,7 @@ enum TablePrefix:int {
FUNCTION,
TASK_RECONSTRUCTION,
HEARTBEAT,
HEARTBEAT_BATCH,
ERROR_INFO,
DRIVER,
PROFILE,
@@ -30,6 +31,7 @@ enum TablePubsub:int {
OBJECT,
ACTOR,
HEARTBEAT,
HEARTBEAT_BATCH,
ERROR_INFO,
TASK_LEASE,
DRIVER,
@@ -262,6 +264,10 @@ table HeartbeatTableData {
resource_load_capacity: [double];
}
table HeartbeatBatchTableData {
batch: [HeartbeatTableData];
}
// Data for a lease on task execution.
table TaskLeaseData {
// Node manager client ID.
+1
View File
@@ -479,6 +479,7 @@ template class Log<ActorID, ActorTableData>;
template class Log<TaskID, TaskReconstructionData>;
template class Table<TaskID, TaskLeaseData>;
template class Table<ClientID, HeartbeatTableData>;
template class Table<ClientID, HeartbeatBatchTableData>;
template class Log<JobID, ErrorTableData>;
template class Log<UniqueID, ClientTableData>;
template class Log<JobID, DriverTableData>;
+12
View File
@@ -351,6 +351,17 @@ class HeartbeatTable : public Table<ClientID, HeartbeatTableData> {
virtual ~HeartbeatTable() {}
};
class HeartbeatBatchTable : public Table<ClientID, HeartbeatBatchTableData> {
public:
HeartbeatBatchTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
AsyncGcsClient *client)
: Table(contexts, client) {
pubsub_channel_ = TablePubsub::HEARTBEAT_BATCH;
prefix_ = TablePrefix::HEARTBEAT_BATCH;
}
virtual ~HeartbeatBatchTable() {}
};
class DriverTable : public Log<JobID, DriverTableData> {
public:
DriverTable(const std::vector<std::shared_ptr<RedisContext>> &contexts,
@@ -359,6 +370,7 @@ class DriverTable : public Log<JobID, DriverTableData> {
pubsub_channel_ = TablePubsub::DRIVER;
prefix_ = TablePrefix::DRIVER;
};
virtual ~DriverTable() {}
/// Appends driver data to the driver table.
+16 -2
View File
@@ -23,14 +23,16 @@ Monitor::Monitor(boost::asio::io_service &io_service, const std::string &redis_a
RAY_CHECK_OK(gcs_client_.Attach(io_service));
}
void Monitor::HandleHeartbeat(const ClientID &client_id) {
void Monitor::HandleHeartbeat(const ClientID &client_id,
const HeartbeatTableDataT &heartbeat_data) {
heartbeats_[client_id] = num_heartbeats_timeout_;
heartbeat_buffer_[client_id] = heartbeat_data;
}
void Monitor::Start() {
const auto heartbeat_callback = [this](gcs::AsyncGcsClient *client, const ClientID &id,
const HeartbeatTableDataT &heartbeat_data) {
HandleHeartbeat(id);
HandleHeartbeat(id, heartbeat_data);
};
RAY_CHECK_OK(gcs_client_.heartbeat_table().Subscribe(
UniqueID::nil(), UniqueID::nil(), heartbeat_callback, nullptr, nullptr));
@@ -66,6 +68,18 @@ void Monitor::Tick() {
}
}
// Send any buffered heartbeats as a single publish.
if (!heartbeat_buffer_.empty()) {
auto batch = std::make_shared<HeartbeatBatchTableDataT>();
for (const auto &heartbeat : heartbeat_buffer_) {
batch->batch.push_back(std::unique_ptr<HeartbeatTableDataT>(
new HeartbeatTableDataT(heartbeat.second)));
}
RAY_CHECK_OK(gcs_client_.heartbeat_batch_table().Add(UniqueID::nil(), UniqueID::nil(),
batch, nullptr));
heartbeat_buffer_.clear();
}
auto heartbeat_period = boost::posix_time::milliseconds(
RayConfig::instance().heartbeat_timeout_milliseconds());
heartbeat_timer_.expires_from_now(heartbeat_period);
+5 -1
View File
@@ -33,7 +33,9 @@ class Monitor {
/// Handle a heartbeat from a Raylet.
///
/// \param client_id The client ID of the Raylet that sent the heartbeat.
void HandleHeartbeat(const ClientID &client_id);
/// \param heartbeat_data The heartbeat sent by the client.
void HandleHeartbeat(const ClientID &client_id,
const HeartbeatTableDataT &heartbeat_data);
private:
/// A client to the GCS, through which heartbeats are received.
@@ -47,6 +49,8 @@ class Monitor {
std::unordered_map<ClientID, int64_t> heartbeats_;
/// The Raylets that have been marked as dead in the client table.
std::unordered_set<ClientID> dead_clients_;
/// A buffer containing heartbeats received from node managers in the last tick.
std::unordered_map<ClientID, HeartbeatTableDataT> heartbeat_buffer_;
};
} // namespace raylet
+23 -15
View File
@@ -156,15 +156,16 @@ ray::Status NodeManager::RegisterGcs() {
};
gcs_client_->client_table().RegisterClientRemovedCallback(node_manager_client_removed);
// Subscribe to node manager heartbeats.
const auto heartbeat_added = [this](gcs::AsyncGcsClient *client, const ClientID &id,
const HeartbeatTableDataT &heartbeat_data) {
HeartbeatAdded(client, id, heartbeat_data);
// Subscribe to heartbeat batches from the monitor.
const auto &heartbeat_batch_added = [this](
gcs::AsyncGcsClient *client, const ClientID &id,
const HeartbeatBatchTableDataT &heartbeat_batch) {
HeartbeatBatchAdded(heartbeat_batch);
};
RAY_RETURN_NOT_OK(gcs_client_->heartbeat_table().Subscribe(
UniqueID::nil(), UniqueID::nil(), heartbeat_added, nullptr,
RAY_RETURN_NOT_OK(gcs_client_->heartbeat_batch_table().Subscribe(
UniqueID::nil(), UniqueID::nil(), heartbeat_batch_added, nullptr,
[](gcs::AsyncGcsClient *client) {
RAY_LOG(DEBUG) << "heartbeat table subscription done callback called.";
RAY_LOG(DEBUG) << "Heartbeat batch table subscription done.";
}));
// Subscribe to driver table updates.
@@ -399,14 +400,9 @@ void NodeManager::ClientRemoved(const ClientTableDataT &client_data) {
remote_server_connections_.erase(client_id);
}
void NodeManager::HeartbeatAdded(gcs::AsyncGcsClient *client, const ClientID &client_id,
void NodeManager::HeartbeatAdded(const ClientID &client_id,
const HeartbeatTableDataT &heartbeat_data) {
RAY_LOG(DEBUG) << "[HeartbeatAdded]: received heartbeat from client id " << client_id;
const ClientID &local_client_id = gcs_client_->client_table().GetLocalClientId();
if (client_id == local_client_id) {
// Skip heartbeats from self.
return;
}
// Locate the client id in remote client table and update available resources based on
// the received heartbeat information.
auto it = cluster_resource_map_.find(client_id);
@@ -427,9 +423,8 @@ void NodeManager::HeartbeatAdded(gcs::AsyncGcsClient *client, const ClientID &cl
remote_resources.SetAvailableResources(std::move(remote_available));
// Extract the load information and save it locally.
remote_resources.SetLoadResources(std::move(remote_load));
auto decision = scheduling_policy_.SpillOver(remote_resources);
// Extract decision for this local scheduler.
auto decision = scheduling_policy_.SpillOver(remote_resources);
std::unordered_set<TaskID> local_task_ids;
for (const auto &task_id : decision) {
// (See design_docs/task_states.rst for the state transition diagram.)
@@ -448,6 +443,19 @@ void NodeManager::HeartbeatAdded(gcs::AsyncGcsClient *client, const ClientID &cl
}
}
void NodeManager::HeartbeatBatchAdded(const HeartbeatBatchTableDataT &heartbeat_batch) {
const ClientID &local_client_id = gcs_client_->client_table().GetLocalClientId();
// Update load information provided by each heartbeat.
for (const auto &heartbeat_data : heartbeat_batch.batch) {
const ClientID &client_id = ClientID::from_binary(heartbeat_data->client_id);
if (client_id == local_client_id) {
// Skip heartbeats from self.
continue;
}
HeartbeatAdded(client_id, *heartbeat_data);
}
}
void NodeManager::HandleActorCreation(const ActorID &actor_id,
const std::vector<ActorTableDataT> &data) {
RAY_LOG(DEBUG) << "Actor creation notification received: " << actor_id;
+5 -3
View File
@@ -127,12 +127,14 @@ class NodeManager {
/// Handler for a heartbeat notification from the GCS.
///
/// \param client The GCS client.
/// \param id The ID of the node manager that sent the heartbeat.
/// \param data The heartbeat data including load information.
/// \return Void.
void HeartbeatAdded(gcs::AsyncGcsClient *client, const ClientID &id,
const HeartbeatTableDataT &data);
void HeartbeatAdded(const ClientID &id, const HeartbeatTableDataT &data);
/// Handler for a heartbeat batch notification from the GCS
///
/// \param heartbeat_batch The batch of heartbeat data.
void HeartbeatBatchAdded(const HeartbeatBatchTableDataT &heartbeat_batch);
/// Methods for task scheduling.
+10 -6
View File
@@ -1,6 +1,8 @@
#include "scheduling_policy.h"
#include <algorithm>
#include <chrono>
#include <random>
#include "scheduling_policy.h"
#include "ray/util/logging.h"
@@ -123,21 +125,23 @@ std::vector<TaskID> SchedulingPolicy::SpillOver(
ResourceSet new_load(remote_scheduling_resources.GetLoadResources());
// Check if we can accommodate an infeasible task.
// Check if we can accommodate infeasible tasks.
for (const auto &task : scheduling_queue_.GetInfeasibleTasks()) {
const auto &spec = task.GetTaskSpecification();
if (spec.GetRequiredPlacementResources().IsSubset(
remote_scheduling_resources.GetTotalResources())) {
const auto &placement_resources = spec.GetRequiredPlacementResources();
if (placement_resources.IsSubset(remote_scheduling_resources.GetTotalResources())) {
decision.push_back(spec.TaskId());
new_load.AddResources(spec.GetRequiredResources());
}
}
// Try to accommodate up to a single ready task.
for (const auto &task : scheduling_queue_.GetReadyTasks()) {
const auto &spec = task.GetTaskSpecification();
if (!spec.IsActorTask()) {
// Make sure the node has enough available resources to prevent forwarding cycles.
if (spec.GetRequiredPlacementResources().IsSubset(
remote_scheduling_resources.GetTotalResources())) {
remote_scheduling_resources.GetAvailableResources())) {
decision.push_back(spec.TaskId());
new_load.AddResources(spec.GetRequiredResources());
break;
+7
View File
@@ -36,6 +36,13 @@ class SchedulingPolicy {
std::unordered_map<ClientID, SchedulingResources> &cluster_resources,
const ClientID &local_client_id);
/// \brief Given a set of cluster resources perform a spill-over scheduling operation.
///
/// \param cluster_resources: a set of cluster resources containing resource and load
/// information for some subset of the cluster. For all client IDs in the returned
/// placement map, the corresponding SchedulingResources::resources_load_ is
/// incremented by the aggregate resource demand of the tasks assigned to it.
/// \return Scheduling decision, mapping tasks to raylets for placement.
std::vector<TaskID> SpillOver(SchedulingResources &remote_scheduling_resources) const;
/// \brief SchedulingPolicy destructor.
+1 -1
View File
@@ -145,8 +145,8 @@ const std::string ResourceSet::ToString() const {
// Convert the first element to a string.
if (it != resource_capacity_.end()) {
return_string += "{" + it->first + "," + std::to_string(it->second) + "}";
it++;
}
it++;
// Add the remaining elements to the string (along with a comma).
for (; it != resource_capacity_.end(); ++it) {
+42 -9
View File
@@ -3,6 +3,7 @@ from __future__ import division
from __future__ import print_function
import collections
import json
import random
import numpy as np
import os
@@ -919,13 +920,16 @@ def test_actor_multiple_gpus_from_multiple_tasks(shutdown_only):
num_local_schedulers=num_local_schedulers,
redirect_output=True,
num_cpus=(num_local_schedulers * [10 * num_gpus_per_scheduler]),
num_gpus=(num_local_schedulers * [num_gpus_per_scheduler]))
num_gpus=(num_local_schedulers * [num_gpus_per_scheduler]),
_internal_config=json.dumps({
"num_heartbeats_timeout": 1000
}))
@ray.remote
def create_actors(n):
def create_actors(i, n):
@ray.remote(num_gpus=1)
class Actor(object):
def __init__(self):
def __init__(self, i, j):
self.gpu_ids = ray.get_gpu_ids()
def get_location_and_ids(self):
@@ -933,15 +937,44 @@ def test_actor_multiple_gpus_from_multiple_tasks(shutdown_only):
ray.worker.global_worker.plasma_client.store_socket_name),
tuple(self.gpu_ids))
# Create n actors.
for _ in range(n):
Actor.remote()
def sleep(self):
time.sleep(100)
ray.get([
create_actors.remote(num_gpus_per_scheduler)
for _ in range(num_local_schedulers)
# Create n actors.
actors = []
for j in range(n):
actors.append(Actor.remote(i, j))
locations = ray.get(
[actor.get_location_and_ids.remote() for actor in actors])
# Put each actor to sleep for a long time to prevent them from getting
# terminated.
for actor in actors:
actor.sleep.remote()
return locations
all_locations = ray.get([
create_actors.remote(i, num_gpus_per_scheduler)
for i in range(num_local_schedulers)
])
# Make sure that no two actors are assigned to the same GPU.
node_names = {
location
for locations in all_locations for location, gpu_id in locations
}
assert len(node_names) == num_local_schedulers
# Keep track of which GPU IDs are being used for each location.
gpus_in_use = {node_name: [] for node_name in node_names}
for locations in all_locations:
for location, gpu_ids in locations:
gpus_in_use[location].extend(gpu_ids)
for node_name in node_names:
assert len(set(gpus_in_use[node_name])) == num_gpus_per_scheduler
@ray.remote(num_gpus=1)
class Actor(object):
def __init__(self):