Files
ray/src/scheduler.cc
T
2016-06-10 14:12:15 -07:00

777 lines
35 KiB
C++

#include "scheduler.h"
#include <random>
#include <thread>
#include <chrono>
#include "utils.h"
SchedulerService::SchedulerService(SchedulingAlgorithmType scheduling_algorithm) : scheduling_algorithm_(scheduling_algorithm) {}
Status SchedulerService::SubmitTask(ServerContext* context, const SubmitTaskRequest* request, SubmitTaskReply* reply) {
std::unique_ptr<Task> task(new Task(request->task())); // need to copy, because request is const
fntable_lock_.lock();
if (fntable_.find(task->name()) == fntable_.end()) {
// TODO(rkn): In the future, this should probably not be fatal. Instead, propagate the error back to the worker.
RAY_LOG(RAY_FATAL, "The function " << task->name() << " has not been registered by any worker.");
}
size_t num_return_vals = fntable_[task->name()].num_return_vals();
fntable_lock_.unlock();
std::vector<ObjRef> result_objrefs;
for (size_t i = 0; i < num_return_vals; ++i) {
ObjRef result = register_new_object();
reply->add_result(result);
task->add_result(result);
result_objrefs.push_back(result);
}
{
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_); // we grab this lock because increment_ref_count assumes it has been acquired
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before we reply to the worker that called SubmitTask. The corresponding decrement will happen in submit_task in raylib.
increment_ref_count(result_objrefs); // We increment once so the objrefs don't go out of scope before the task is scheduled on the worker. The corresponding decrement will happen in deserialize_task in raylib.
}
auto operation = std::unique_ptr<Operation>(new Operation());
operation->set_allocated_task(task.release());
OperationId creator_operationid = ROOT_OPERATION; // TODO(rkn): Later, this should be the ID of the task that spawned this current task.
operation->set_creator_operationid(creator_operationid);
computation_graph_lock_.lock();
OperationId operationid = computation_graph_.add_operation(std::move(operation));
computation_graph_lock_.unlock();
task_queue_lock_.lock();
task_queue_.push_back(operationid);
task_queue_lock_.unlock();
schedule();
return Status::OK;
}
Status SchedulerService::PushObj(ServerContext* context, const PushObjRequest* request, PushObjReply* reply) {
ObjRef objref = register_new_object();
ObjStoreId objstoreid = get_store(request->workerid());
reply->set_objref(objref);
schedule();
return Status::OK;
}
Status SchedulerService::RequestObj(ServerContext* context, const RequestObjRequest* request, AckReply* reply) {
objtable_lock_.lock();
size_t size = objtable_.size();
objtable_lock_.unlock();
ObjRef objref = request->objref();
if (objref >= size) {
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << objref << " exists");
}
pull_queue_lock_.lock();
pull_queue_.push_back(std::make_pair(request->workerid(), objref));
pull_queue_lock_.unlock();
schedule();
return Status::OK;
}
Status SchedulerService::AliasObjRefs(ServerContext* context, const AliasObjRefsRequest* request, AckReply* reply) {
ObjRef alias_objref = request->alias_objref();
ObjRef target_objref = request->target_objref();
RAY_LOG(RAY_ALIAS, "Aliasing objref " << alias_objref << " with objref " << target_objref);
if (alias_objref == target_objref) {
RAY_LOG(RAY_FATAL, "internal error: attempting to alias objref " << alias_objref << " with itself.");
}
objtable_lock_.lock();
size_t size = objtable_.size();
objtable_lock_.unlock();
if (alias_objref >= size) {
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << alias_objref << " exists");
}
if (target_objref >= size) {
RAY_LOG(RAY_FATAL, "internal error: no object with objref " << target_objref << " exists");
}
{
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
if (target_objrefs_[alias_objref] != UNITIALIZED_ALIAS) {
RAY_LOG(RAY_FATAL, "internal error: attempting to alias objref " << alias_objref << " with objref " << target_objref << ", but objref " << alias_objref << " has already been aliased with objref " << target_objrefs_[alias_objref]);
}
target_objrefs_[alias_objref] = target_objref;
}
{
std::lock_guard<std::mutex> reverse_target_objrefs_lock(reverse_target_objrefs_lock_);
reverse_target_objrefs_[target_objref].push_back(alias_objref);
}
schedule();
return Status::OK;
}
Status SchedulerService::RegisterObjStore(ServerContext* context, const RegisterObjStoreRequest* request, RegisterObjStoreReply* reply) {
std::lock_guard<std::mutex> objstore_lock(objstores_lock_);
ObjStoreId objstoreid = objstores_.size();
auto channel = grpc::CreateChannel(request->objstore_address(), grpc::InsecureChannelCredentials());
objstores_.push_back(ObjStoreHandle());
objstores_[objstoreid].address = request->objstore_address();
objstores_[objstoreid].channel = channel;
objstores_[objstoreid].objstore_stub = ObjStore::NewStub(channel);
reply->set_objstoreid(objstoreid);
return Status::OK;
}
Status SchedulerService::RegisterWorker(ServerContext* context, const RegisterWorkerRequest* request, RegisterWorkerReply* reply) {
std::pair<WorkerId, ObjStoreId> info = register_worker(request->worker_address(), request->objstore_address());
WorkerId workerid = info.first;
ObjStoreId objstoreid = info.second;
RAY_LOG(RAY_INFO, "registered worker with workerid " << workerid);
reply->set_workerid(workerid);
reply->set_objstoreid(objstoreid);
schedule();
return Status::OK;
}
Status SchedulerService::RegisterFunction(ServerContext* context, const RegisterFunctionRequest* request, AckReply* reply) {
RAY_LOG(RAY_INFO, "register function " << request->fnname() << " from workerid " << request->workerid());
register_function(request->fnname(), request->workerid(), request->num_return_vals());
schedule();
return Status::OK;
}
Status SchedulerService::ObjReady(ServerContext* context, const ObjReadyRequest* request, AckReply* reply) {
ObjRef objref = request->objref();
RAY_LOG(RAY_DEBUG, "object " << objref << " ready on store " << request->objstoreid());
add_canonical_objref(objref);
add_location(objref, request->objstoreid());
schedule();
return Status::OK;
}
Status SchedulerService::WorkerReady(ServerContext* context, const WorkerReadyRequest* request, AckReply* reply) {
RAY_LOG(RAY_INFO, "worker " << request->workerid() << " reported back");
{
std::lock_guard<std::mutex> lock(avail_workers_lock_);
avail_workers_.push_back(request->workerid());
}
schedule();
return Status::OK;
}
Status SchedulerService::IncrementRefCount(ServerContext* context, const IncrementRefCountRequest* request, AckReply* reply) {
int num_objrefs = request->objref_size();
if (num_objrefs == 0) {
RAY_LOG(RAY_FATAL, "Scheduler received IncrementRefCountRequest with 0 objrefs.");
}
std::vector<ObjRef> objrefs;
for (int i = 0; i < num_objrefs; ++i) {
objrefs.push_back(request->objref(i));
}
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_); // we grab this lock because increment_ref_count assumes it has been acquired
increment_ref_count(objrefs);
return Status::OK;
}
Status SchedulerService::DecrementRefCount(ServerContext* context, const DecrementRefCountRequest* request, AckReply* reply) {
int num_objrefs = request->objref_size();
if (num_objrefs == 0) {
RAY_LOG(RAY_FATAL, "Scheduler received DecrementRefCountRequest with 0 objrefs.");
}
std::vector<ObjRef> objrefs;
for (int i = 0; i < num_objrefs; ++i) {
objrefs.push_back(request->objref(i));
}
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_); // we grab this lock, because decrement_ref_count assumes it has been acquired
decrement_ref_count(objrefs);
return Status::OK;
}
Status SchedulerService::AddContainedObjRefs(ServerContext* context, const AddContainedObjRefsRequest* request, AckReply* reply) {
ObjRef objref = request->objref();
// if (!is_canonical(objref)) {
// TODO(rkn): Perhaps we don't need this check. It won't work because the objstore may not have called ObjReady yet.
// RAY_LOG(RAY_FATAL, "Attempting to add contained objrefs for non-canonical objref " << objref);
// }
std::lock_guard<std::mutex> contained_objrefs_lock(contained_objrefs_lock_);
if (contained_objrefs_[objref].size() != 0) {
RAY_LOG(RAY_FATAL, "Attempting to add contained objrefs for objref " << objref << ", but contained_objrefs_[objref].size() != 0.");
}
for (int i = 0; i < request->contained_objref_size(); ++i) {
contained_objrefs_[objref].push_back(request->contained_objref(i));
}
return Status::OK;
}
Status SchedulerService::SchedulerInfo(ServerContext* context, const SchedulerInfoRequest* request, SchedulerInfoReply* reply) {
get_info(*request, reply);
return Status::OK;
}
// TODO(rkn): This could execute multiple times with the same arguments before
// the delivery finishes, but we only want it to happen once. Currently, the
// redundancy is handled by the object store, which will only execute the
// delivery once. However, we may want to handle it in the scheduler in the
// future.
//
// deliver_object assumes that the aliasing for objref has already been completed. That is, has_canonical_objref(objref) == true
void SchedulerService::deliver_object(ObjRef objref, ObjStoreId from, ObjStoreId to) {
if (from == to) {
RAY_LOG(RAY_FATAL, "attempting to deliver objref " << objref << " from objstore " << from << " to itself.");
}
if (!has_canonical_objref(objref)) {
RAY_LOG(RAY_FATAL, "attempting to deliver objref " << objref << ", but this objref does not yet have a canonical objref.");
}
ClientContext context;
AckReply reply;
StartDeliveryRequest request;
ObjRef canonical_objref = get_canonical_objref(objref);
request.set_objref(canonical_objref);
std::lock_guard<std::mutex> lock(objstores_lock_);
request.set_objstore_address(objstores_[from].address);
objstores_[to].objstore_stub->StartDelivery(&context, request, &reply);
}
void SchedulerService::schedule() {
// TODO(rkn): Do this more intelligently.
perform_pulls(); // See what we can do in pull_queue_
if (scheduling_algorithm_ == SCHEDULING_ALGORITHM_NAIVE) {
schedule_tasks_naively(); // See what we can do in task_queue_
} else if (scheduling_algorithm_ == SCHEDULING_ALGORITHM_LOCALITY_AWARE) {
schedule_tasks_location_aware(); // See what we can do in task_queue_
} else {
RAY_LOG(RAY_FATAL, "scheduling algorithm not known");
}
perform_notify_aliases(); // See what we can do in alias_notification_queue_
}
// assign_task assumes that computation_graph_lock_ has been acquired.
// assign_task assumes that the canonical objrefs for its arguments are all ready, that is has_canonical_objref() is true for all of the call's arguments
void SchedulerService::assign_task(OperationId operationid, WorkerId workerid) {
const Task& task = computation_graph_.get_task(operationid);
ClientContext context;
ExecuteTaskRequest request;
ExecuteTaskReply reply;
RAY_LOG(RAY_INFO, "starting to send arguments");
for (size_t i = 0; i < task.arg_size(); ++i) {
if (!task.arg(i).has_obj()) {
ObjRef objref = task.arg(i).ref();
ObjRef canonical_objref = get_canonical_objref(objref);
{
// Notify the relevant objstore about potential aliasing when it's ready
std::lock_guard<std::mutex> alias_notification_queue_lock(alias_notification_queue_lock_);
alias_notification_queue_.push_back(std::make_pair(get_store(workerid), std::make_pair(objref, canonical_objref)));
}
attempt_notify_alias(get_store(workerid), objref, canonical_objref);
RAY_LOG(RAY_DEBUG, "task contains object ref " << canonical_objref);
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
auto &objstores = objtable_[canonical_objref];
std::lock_guard<std::mutex> workers_lock(workers_lock_);
if (!std::binary_search(objstores.begin(), objstores.end(), workers_[workerid].objstoreid)) { // TODO(rkn): replace this with get_store
deliver_object(canonical_objref, pick_objstore(canonical_objref), workers_[workerid].objstoreid); // TODO(rkn): replace this with get_store
}
}
}
request.mutable_task()->CopyFrom(task); // TODO(rkn): Is ownership handled properly here?
Status status = workers_[workerid].worker_stub->ExecuteTask(&context, request, &reply);
}
bool SchedulerService::can_run(const Task& task) {
std::lock_guard<std::mutex> lock(objtable_lock_);
for (int i = 0; i < task.arg_size(); ++i) {
if (!task.arg(i).has_obj()) {
ObjRef objref = task.arg(i).ref();
if (!has_canonical_objref(objref)) {
return false;
}
ObjRef canonical_objref = get_canonical_objref(objref);
if (canonical_objref >= objtable_.size() || objtable_[canonical_objref].size() == 0) {
return false;
}
}
}
return true;
}
std::pair<WorkerId, ObjStoreId> SchedulerService::register_worker(const std::string& worker_address, const std::string& objstore_address) {
RAY_LOG(RAY_INFO, "registering worker " << worker_address << " connected to object store " << objstore_address);
ObjStoreId objstoreid = std::numeric_limits<size_t>::max();
for (int num_attempts = 0; num_attempts < 5; ++num_attempts) {
std::lock_guard<std::mutex> lock(objstores_lock_);
for (size_t i = 0; i < objstores_.size(); ++i) {
if (objstores_[i].address == objstore_address) {
objstoreid = i;
}
}
if (objstoreid == std::numeric_limits<size_t>::max()) {
std::this_thread::sleep_for (std::chrono::milliseconds(100));
}
}
if (objstoreid == std::numeric_limits<size_t>::max()) {
RAY_LOG(RAY_FATAL, "object store with address " << objstore_address << " not yet registered");
}
workers_lock_.lock();
WorkerId workerid = workers_.size();
workers_.push_back(WorkerHandle());
auto channel = grpc::CreateChannel(worker_address, grpc::InsecureChannelCredentials());
workers_[workerid].channel = channel;
workers_[workerid].objstoreid = objstoreid;
workers_[workerid].worker_stub = WorkerService::NewStub(channel);
workers_lock_.unlock();
avail_workers_lock_.lock();
avail_workers_.push_back(workerid);
avail_workers_lock_.unlock();
return std::make_pair(workerid, objstoreid);
}
ObjRef SchedulerService::register_new_object() {
// If we don't simultaneously lock objtable_ and target_objrefs_, we will probably get errors.
// TODO(rkn): increment/decrement_reference_count also acquire reference_counts_lock_ and target_objrefs_lock_ (through has_canonical_objref()), which caused deadlock in the past
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_);
std::lock_guard<std::mutex> contained_objrefs_lock(contained_objrefs_lock_);
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
std::lock_guard<std::mutex> reverse_target_objrefs_lock(reverse_target_objrefs_lock_);
ObjRef objtable_size = objtable_.size();
ObjRef target_objrefs_size = target_objrefs_.size();
ObjRef reverse_target_objrefs_size = reverse_target_objrefs_.size();
ObjRef reference_counts_size = reference_counts_.size();
ObjRef contained_objrefs_size = contained_objrefs_.size();
if (objtable_size != target_objrefs_size) {
RAY_LOG(RAY_FATAL, "objtable_ and target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and target_objrefs_.size() = " << target_objrefs_size);
}
if (objtable_size != reverse_target_objrefs_size) {
RAY_LOG(RAY_FATAL, "objtable_ and reverse_target_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and reverse_target_objrefs_.size() = " << reverse_target_objrefs_size);
}
if (objtable_size != reference_counts_size) {
RAY_LOG(RAY_FATAL, "objtable_ and reference_counts_ should have the same size, but objtable_.size() = " << objtable_size << " and reference_counts_.size() = " << reference_counts_size);
}
if (objtable_size != contained_objrefs_size) {
RAY_LOG(RAY_FATAL, "objtable_ and contained_objrefs_ should have the same size, but objtable_.size() = " << objtable_size << " and contained_objrefs_.size() = " << contained_objrefs_size);
}
objtable_.push_back(std::vector<ObjStoreId>());
target_objrefs_.push_back(UNITIALIZED_ALIAS);
reverse_target_objrefs_.push_back(std::vector<ObjRef>());
reference_counts_.push_back(0);
contained_objrefs_.push_back(std::vector<ObjRef>());
return objtable_size;
}
void SchedulerService::add_location(ObjRef canonical_objref, ObjStoreId objstoreid) {
// add_location must be called with a canonical objref
if (!is_canonical(canonical_objref)) {
RAY_LOG(RAY_FATAL, "Attempting to call add_location with a non-canonical objref (objref " << canonical_objref << ")");
}
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
if (canonical_objref >= objtable_.size()) {
RAY_LOG(RAY_FATAL, "trying to put an object in the object store that was not registered with the scheduler (objref " << canonical_objref << ")");
}
// do a binary search
auto pos = std::lower_bound(objtable_[canonical_objref].begin(), objtable_[canonical_objref].end(), objstoreid);
if (pos == objtable_[canonical_objref].end() || objstoreid < *pos) {
objtable_[canonical_objref].insert(pos, objstoreid);
}
}
void SchedulerService::add_canonical_objref(ObjRef objref) {
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
if (objref >= target_objrefs_.size()) {
RAY_LOG(RAY_FATAL, "internal error: attempting to insert objref " << objref << " in target_objrefs_, but target_objrefs_.size() is " << target_objrefs_.size());
}
if (target_objrefs_[objref] != UNITIALIZED_ALIAS && target_objrefs_[objref] != objref) {
RAY_LOG(RAY_FATAL, "internal error: attempting to declare objref " << objref << " as a canonical objref, but target_objrefs_[objref] is already aliased with objref " << target_objrefs_[objref]);
}
target_objrefs_[objref] = objref;
}
ObjStoreId SchedulerService::get_store(WorkerId workerid) {
std::lock_guard<std::mutex> lock(workers_lock_);
ObjStoreId result = workers_[workerid].objstoreid;
return result;
}
void SchedulerService::register_function(const std::string& name, WorkerId workerid, size_t num_return_vals) {
std::lock_guard<std::mutex> lock(fntable_lock_);
FnInfo& info = fntable_[name];
info.set_num_return_vals(num_return_vals);
info.add_worker(workerid);
}
void SchedulerService::get_info(const SchedulerInfoRequest& request, SchedulerInfoReply* reply) {
// TODO(rkn): Also grab the objstores_lock_
// alias_notification_queue_lock_ may need to come before objtable_lock_
std::lock_guard<std::mutex> reference_counts_lock(reference_counts_lock_);
std::lock_guard<std::mutex> contained_objrefs_lock(contained_objrefs_lock_);
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
std::lock_guard<std::mutex> pull_queue_lock(pull_queue_lock_);
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
std::lock_guard<std::mutex> reverse_target_objrefs_lock(reverse_target_objrefs_lock_);
std::lock_guard<std::mutex> fntable_lock(fntable_lock_);
std::lock_guard<std::mutex> avail_workers_lock(avail_workers_lock_);
std::lock_guard<std::mutex> task_queue_lock(task_queue_lock_);
std::lock_guard<std::mutex> alias_notification_queue_lock(alias_notification_queue_lock_);
for (int i = 0; i < reference_counts_.size(); ++i) {
reply->add_reference_count(reference_counts_[i]);
}
for (int i = 0; i < target_objrefs_.size(); ++i) {
reply->add_target_objref(target_objrefs_[i]);
}
auto function_table = reply->mutable_function_table();
for (const auto& entry : fntable_) {
(*function_table)[entry.first].set_num_return_vals(entry.second.num_return_vals());
for (const WorkerId& worker : entry.second.workers()) {
(*function_table)[entry.first].add_workerid(worker);
}
}
for (const auto& entry : task_queue_) {
reply->add_operationid(entry);
}
for (const WorkerId& entry : avail_workers_) {
reply->add_avail_worker(entry);
}
}
// pick_objstore assumes that objtable_lock_ has been acquired
// pick_objstore must be called with a canonical_objref
ObjStoreId SchedulerService::pick_objstore(ObjRef canonical_objref) {
std::mt19937 rng;
if (!is_canonical(canonical_objref)) {
RAY_LOG(RAY_FATAL, "Attempting to call pick_objstore with a non-canonical objref, (objref " << canonical_objref << ")");
}
std::uniform_int_distribution<int> uni(0, objtable_[canonical_objref].size() - 1);
ObjStoreId objstoreid = objtable_[canonical_objref][uni(rng)];
return objstoreid;
}
bool SchedulerService::is_canonical(ObjRef objref) {
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
if (target_objrefs_[objref] == UNITIALIZED_ALIAS) {
RAY_LOG(RAY_FATAL, "Attempting to call is_canonical on an objref for which aliasing is not complete or the object is not ready, target_objrefs_[objref] == UNITIALIZED_ALIAS for objref " << objref << ".");
}
return objref == target_objrefs_[objref];
}
void SchedulerService::perform_pulls() {
std::lock_guard<std::mutex> pull_queue_lock(pull_queue_lock_);
// Complete all pull tasks that can be completed.
for (int i = 0; i < pull_queue_.size(); ++i) {
const std::pair<WorkerId, ObjRef>& pull = pull_queue_[i];
ObjRef objref = pull.second;
WorkerId workerid = pull.first;
if (!has_canonical_objref(objref)) {
RAY_LOG(RAY_ALIAS, "objref " << objref << " does not have a canonical_objref, so continuing");
continue;
}
ObjRef canonical_objref = get_canonical_objref(objref);
RAY_LOG(RAY_DEBUG, "attempting to pull objref " << pull.second << " with canonical objref " << canonical_objref << " to objstore " << get_store(workerid));
objtable_lock_.lock();
int num_stores = objtable_[canonical_objref].size();
objtable_lock_.unlock();
if (num_stores > 0) {
{
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
if (!std::binary_search(objtable_[canonical_objref].begin(), objtable_[canonical_objref].end(), get_store(workerid))) {
// The worker's local object store does not already contain objref, so ship
// it there from an object store that does have it.
ObjStoreId objstoreid = pick_objstore(canonical_objref);
deliver_object(canonical_objref, objstoreid, get_store(workerid));
}
}
{
// Notify the relevant objstore about potential aliasing when it's ready
std::lock_guard<std::mutex> alias_notification_queue_lock(alias_notification_queue_lock_);
alias_notification_queue_.push_back(std::make_pair(get_store(workerid), std::make_pair(objref, canonical_objref)));
}
// Remove the pull task from the queue
std::swap(pull_queue_[i], pull_queue_[pull_queue_.size() - 1]);
pull_queue_.pop_back();
i -= 1;
}
}
}
void SchedulerService::schedule_tasks_naively() {
std::lock_guard<std::mutex> computation_graph_lock(computation_graph_lock_);
std::lock_guard<std::mutex> fntable_lock(fntable_lock_);
std::lock_guard<std::mutex> avail_workers_lock(avail_workers_lock_);
std::lock_guard<std::mutex> task_queue_lock(task_queue_lock_);
for (int i = 0; i < avail_workers_.size(); ++i) {
// Submit all tasks whose arguments are ready.
WorkerId workerid = avail_workers_[i];
for (auto it = task_queue_.begin(); it != task_queue_.end(); ++it) {
// The use of erase(it) below invalidates the iterator, but we
// immediately break out of the inner loop, so the iterator is not used
// after the erase
const OperationId operationid = *it;
const Task& task = computation_graph_.get_task(operationid);
auto& workers = fntable_[task.name()].workers();
if (std::binary_search(workers.begin(), workers.end(), workerid) && can_run(task)) {
assign_task(operationid, workerid);
task_queue_.erase(it);
std::swap(avail_workers_[i], avail_workers_[avail_workers_.size() - 1]);
avail_workers_.pop_back();
i -= 1;
break;
}
}
}
}
void SchedulerService::schedule_tasks_location_aware() {
std::lock_guard<std::mutex> computation_graph_lock(computation_graph_lock_);
std::lock_guard<std::mutex> fntable_lock(fntable_lock_);
std::lock_guard<std::mutex> avail_workers_lock(avail_workers_lock_);
std::lock_guard<std::mutex> task_queue_lock(task_queue_lock_);
for (int i = 0; i < avail_workers_.size(); ++i) {
// Submit all tasks whose arguments are ready.
WorkerId workerid = avail_workers_[i];
ObjStoreId objstoreid = workers_[workerid].objstoreid;
auto bestit = task_queue_.end(); // keep track of the task that fits the worker best so far
size_t min_num_shipped_objects = std::numeric_limits<size_t>::max(); // number of objects that need to be transfered for this worker
for (auto it = task_queue_.begin(); it != task_queue_.end(); ++it) {
OperationId operationid = *it;
const Task& task = computation_graph_.get_task(operationid);
auto& workers = fntable_[task.name()].workers();
if (std::binary_search(workers.begin(), workers.end(), workerid) && can_run(task)) {
// determine how many objects would need to be shipped
size_t num_shipped_objects = 0;
for (int j = 0; j < task.arg_size(); ++j) {
if (!task.arg(j).has_obj()) {
ObjRef objref = task.arg(j).ref();
if (!has_canonical_objref(objref)) {
RAY_LOG(RAY_FATAL, "no canonical object ref found even though task is ready; that should not be possible!");
}
ObjRef canonical_objref = get_canonical_objref(objref);
// check if the object is already in the local object store
if (!std::binary_search(objtable_[canonical_objref].begin(), objtable_[canonical_objref].end(), objstoreid)) {
num_shipped_objects += 1;
}
}
}
if (num_shipped_objects < min_num_shipped_objects) {
min_num_shipped_objects = num_shipped_objects;
bestit = it;
}
}
}
// if we found a suitable task
if (bestit != task_queue_.end()) {
assign_task(*bestit, workerid);
task_queue_.erase(bestit);
std::swap(avail_workers_[i], avail_workers_[avail_workers_.size() - 1]);
avail_workers_.pop_back();
i -= 1;
}
}
}
void SchedulerService::perform_notify_aliases() {
std::lock_guard<std::mutex> alias_notification_queue_lock(alias_notification_queue_lock_);
for (int i = 0; i < alias_notification_queue_.size(); ++i) {
const std::pair<WorkerId, std::pair<ObjRef, ObjRef> > alias_notification = alias_notification_queue_[i];
ObjStoreId objstoreid = alias_notification.first;
ObjRef alias_objref = alias_notification.second.first;
ObjRef canonical_objref = alias_notification.second.second;
if (attempt_notify_alias(objstoreid, alias_objref, canonical_objref)) { // this locks both the objstore_ and objtable_
// the attempt to notify the objstore of the objref aliasing succeeded, so remove the notification task from the queue
std::swap(alias_notification_queue_[i], alias_notification_queue_[alias_notification_queue_.size() - 1]);
alias_notification_queue_.pop_back();
i -= 1;
}
}
}
bool SchedulerService::has_canonical_objref(ObjRef objref) {
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
ObjRef objref_temp = objref;
while (true) {
if (objref_temp >= target_objrefs_.size()) {
RAY_LOG(RAY_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
}
if (target_objrefs_[objref_temp] == UNITIALIZED_ALIAS) {
return false;
}
if (target_objrefs_[objref_temp] == objref_temp) {
return true;
}
objref_temp = target_objrefs_[objref_temp];
}
}
ObjRef SchedulerService::get_canonical_objref(ObjRef objref) {
// get_canonical_objref assumes that has_canonical_objref(objref) is true
std::lock_guard<std::mutex> lock(target_objrefs_lock_);
ObjRef objref_temp = objref;
while (true) {
if (objref_temp >= target_objrefs_.size()) {
RAY_LOG(RAY_FATAL, "Attempting to index target_objrefs_ with objref " << objref_temp << ", but target_objrefs_.size() = " << target_objrefs_.size());
}
if (target_objrefs_[objref_temp] == UNITIALIZED_ALIAS) {
RAY_LOG(RAY_FATAL, "Attempting to get canonical objref for objref " << objref << ", which aliases, objref " << objref_temp << ", but target_objrefs_[objref_temp] == UNITIALIZED_ALIAS for objref_temp = " << objref_temp << ".");
}
if (target_objrefs_[objref_temp] == objref_temp) {
return objref_temp;
}
objref_temp = target_objrefs_[objref_temp];
RAY_LOG(RAY_ALIAS, "Looping in get_canonical_objref.");
}
}
bool SchedulerService::attempt_notify_alias(ObjStoreId objstoreid, ObjRef alias_objref, ObjRef canonical_objref) {
// return true if successful and false otherwise
if (alias_objref == canonical_objref) {
// no need to do anything
return true;
}
{
std::lock_guard<std::mutex> lock(objtable_lock_);
if (!std::binary_search(objtable_[canonical_objref].begin(), objtable_[canonical_objref].end(), objstoreid)) {
// the objstore doesn't have the object for canonical_objref yet, so it's too early to notify the objstore about the alias
return false;
}
}
ClientContext context;
AckReply reply;
NotifyAliasRequest request;
request.set_alias_objref(alias_objref);
request.set_canonical_objref(canonical_objref);
objstores_lock_.lock();
objstores_[objstoreid].objstore_stub->NotifyAlias(&context, request, &reply);
objstores_lock_.unlock();
return true;
}
void SchedulerService::deallocate_object(ObjRef canonical_objref) {
// deallocate_object should only be called from decrement_ref_count (note that
// deallocate_object also recursively calls decrement_ref_count). Both of
// these methods require reference_counts_lock_ to have been acquired, and
// so the lock must before outside of these methods (it is acquired in
// DecrementRefCount).
RAY_LOG(RAY_REFCOUNT, "Deallocating canonical_objref " << canonical_objref << ".");
{
std::lock_guard<std::mutex> objtable_lock(objtable_lock_);
auto &objstores = objtable_[canonical_objref];
std::lock_guard<std::mutex> objstores_lock(objstores_lock_); // TODO(rkn): Should this be inside the for loop instead?
for (int i = 0; i < objstores.size(); ++i) {
ClientContext context;
AckReply reply;
DeallocateObjectRequest request;
request.set_canonical_objref(canonical_objref);
ObjStoreId objstoreid = objstores[i];
RAY_LOG(RAY_REFCOUNT, "Attempting to deallocate canonical_objref " << canonical_objref << " from objstore " << objstoreid);
objstores_[objstoreid].objstore_stub->DeallocateObject(&context, request, &reply);
}
objtable_[canonical_objref].clear();
}
decrement_ref_count(contained_objrefs_[canonical_objref]);
}
void SchedulerService::increment_ref_count(std::vector<ObjRef> &objrefs) {
// increment_ref_count assumes that reference_counts_lock_ has been acquired already
for (int i = 0; i < objrefs.size(); ++i) {
ObjRef objref = objrefs[i];
if (reference_counts_[objref] == DEALLOCATED) {
RAY_LOG(RAY_FATAL, "Attempting to increment the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
}
reference_counts_[objref] += 1;
RAY_LOG(RAY_REFCOUNT, "Incremented ref count for objref " << objref <<". New reference count is " << reference_counts_[objref]);
}
}
void SchedulerService::decrement_ref_count(std::vector<ObjRef> &objrefs) {
// decrement_ref_count assumes that reference_counts_lock_ has been acquired already
for (int i = 0; i < objrefs.size(); ++i) {
ObjRef objref = objrefs[i];
if (reference_counts_[objref] == DEALLOCATED) {
RAY_LOG(RAY_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but this object appears to have been deallocated already.");
}
if (reference_counts_[objref] == 0) {
RAY_LOG(RAY_FATAL, "Attempting to decrement the reference count for objref " << objref << ", but the reference count for this object is already 0.");
}
reference_counts_[objref] -= 1;
RAY_LOG(RAY_REFCOUNT, "Decremented ref count for objref " << objref << ". New reference count is " << reference_counts_[objref]);
// See if we can deallocate the object
std::vector<ObjRef> equivalent_objrefs;
get_equivalent_objrefs(objref, equivalent_objrefs);
bool can_deallocate = true;
for (int j = 0; j < equivalent_objrefs.size(); ++j) {
if (reference_counts_[equivalent_objrefs[j]] != 0) {
can_deallocate = false;
break;
}
}
if (can_deallocate) {
ObjRef canonical_objref = equivalent_objrefs[0];
if (!is_canonical(canonical_objref)) {
RAY_LOG(RAY_FATAL, "canonical_objref is not canonical.");
}
deallocate_object(canonical_objref);
for (int j = 0; j < equivalent_objrefs.size(); ++j) {
reference_counts_[equivalent_objrefs[j]] = DEALLOCATED;
}
}
}
}
void SchedulerService::upstream_objrefs(ObjRef objref, std::vector<ObjRef> &objrefs) {
// upstream_objrefs assumes that the lock reverse_target_objrefs_lock_ has been acquired
objrefs.push_back(objref);
for (int i = 0; i < reverse_target_objrefs_[objref].size(); ++i) {
upstream_objrefs(reverse_target_objrefs_[objref][i], objrefs);
}
}
void SchedulerService::get_equivalent_objrefs(ObjRef objref, std::vector<ObjRef> &equivalent_objrefs) {
std::lock_guard<std::mutex> target_objrefs_lock(target_objrefs_lock_);
ObjRef downstream_objref = objref;
while (target_objrefs_[downstream_objref] != downstream_objref && target_objrefs_[downstream_objref] != UNITIALIZED_ALIAS) {
RAY_LOG(RAY_ALIAS, "Looping in get_equivalent_objrefs");
downstream_objref = target_objrefs_[downstream_objref];
}
std::lock_guard<std::mutex> reverse_target_objrefs_lock(reverse_target_objrefs_lock_);
upstream_objrefs(downstream_objref, equivalent_objrefs);
}
void start_scheduler_service(const char* service_addr, SchedulingAlgorithmType scheduling_algorithm) {
std::string service_address(service_addr);
std::string::iterator split_point = split_ip_address(service_address);
std::string port;
port.assign(split_point, service_address.end());
SchedulerService service(scheduling_algorithm);
ServerBuilder builder;
builder.AddListeningPort(std::string("0.0.0.0:") + port, grpc::InsecureServerCredentials());
builder.RegisterService(&service);
std::unique_ptr<Server> server(builder.BuildAndStart());
server->Wait();
}
char* get_cmd_option(char** begin, char** end, const std::string& option) {
char** it = std::find(begin, end, option);
if (it != end && ++it != end) {
return *it;
}
return 0;
}
int main(int argc, char** argv) {
SchedulingAlgorithmType scheduling_algorithm = SCHEDULING_ALGORITHM_LOCALITY_AWARE;
if (argc < 2) {
RAY_LOG(RAY_FATAL, "scheduler: expected at least one argument (scheduler ip address)");
return 1;
}
if (argc > 2) {
char* scheduling_algorithm_name = get_cmd_option(argv, argv + argc, "--scheduler-algorithm");
if (scheduling_algorithm_name) {
if(std::string(scheduling_algorithm_name) == "naive") {
std::cout << "using 'naive' scheduler" << std::endl;
scheduling_algorithm = SCHEDULING_ALGORITHM_NAIVE;
}
if(std::string(scheduling_algorithm_name) == "locality_aware") {
std::cout << "using 'locality aware' scheduler" << std::endl;
scheduling_algorithm = SCHEDULING_ALGORITHM_LOCALITY_AWARE;
}
}
}
start_scheduler_service(argv[1], scheduling_algorithm);
return 0;
}