mirror of
https://github.com/wassname/ray.git
synced 2026-06-30 06:30:33 +08:00
d95c8b8a41
* Add index for tasks to dispatch * Task dependency manager interface * Unsubscribe dependencies and tests * NodeManager * Revert "Add index for tasks to dispatch" This reverts commit c6ccb9aa306e00f80d34b991055e4e83872595ea. * tmp * Move back to waiting if args not ready * update
261 lines
12 KiB
C++
261 lines
12 KiB
C++
// Copyright 2017 The Ray Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
|
|
// clang-format off
|
|
#include "ray/common/id.h"
|
|
#include "ray/common/task/task.h"
|
|
#include "ray/object_manager/object_manager.h"
|
|
#include "ray/raylet/reconstruction_policy.h"
|
|
// clang-format on
|
|
|
|
namespace ray {
|
|
|
|
namespace raylet {
|
|
|
|
using rpc::TaskLeaseData;
|
|
|
|
class ReconstructionPolicy;
|
|
|
|
/// Used for unit-testing the ClusterTaskManager, which calls these methods for
|
|
/// locally queued tasks that have dependencies.
|
|
class TaskDependencyManagerInterface {
|
|
public:
|
|
virtual bool SubscribeGetDependencies(
|
|
const TaskID &task_id,
|
|
const std::vector<rpc::ObjectReference> &required_objects) = 0;
|
|
virtual bool IsTaskReady(const TaskID &task_id) const = 0;
|
|
virtual bool UnsubscribeGetDependencies(const TaskID &task_id) = 0;
|
|
virtual ~TaskDependencyManagerInterface() {}
|
|
};
|
|
|
|
/// \class TaskDependencyManager
|
|
///
|
|
/// Responsible for managing object dependencies for tasks. The caller can
|
|
/// subscribe to object dependencies for a task. The task manager will
|
|
/// determine which object dependencies are remote. These are the objects that
|
|
/// are neither in the local object store, nor will they be created by a
|
|
/// locally queued task. The task manager will request that these objects be
|
|
/// made available locally, either by object transfer from a remote node or
|
|
/// reconstruction. The task manager will also cancel these objects if they are
|
|
/// no longer needed by any task.
|
|
class TaskDependencyManager : public TaskDependencyManagerInterface {
|
|
public:
|
|
/// Create a task dependency manager.
|
|
TaskDependencyManager(ObjectManagerInterface &object_manager,
|
|
ReconstructionPolicyInterface &reconstruction_policy);
|
|
|
|
/// Check whether an object is locally available.
|
|
///
|
|
/// \param object_id The object to check for.
|
|
/// \return Whether the object is local.
|
|
bool CheckObjectLocal(const ObjectID &object_id) const;
|
|
|
|
/// Subscribe to object depedencies required by the task and check whether
|
|
/// all dependencies are fulfilled. This should be called for task arguments and
|
|
/// `ray.get` calls during task execution.
|
|
///
|
|
/// The TaskDependencyManager will track the task's dependencies
|
|
/// until UnsubscribeGetDependencies is called on the same task ID. If any
|
|
/// dependencies are remote, then they will be requested. When the last
|
|
/// remote dependency later appears locally via a call to HandleObjectLocal,
|
|
/// the subscribed task will be returned by the HandleObjectLocal call,
|
|
/// signifying that it is ready to run. This method may be called multiple
|
|
/// times per task.
|
|
///
|
|
/// \param task_id The ID of the task whose dependencies to subscribe to.
|
|
/// \param required_objects The objects required by the task.
|
|
/// \return Whether all of the given dependencies for the given task are
|
|
/// local.
|
|
bool SubscribeGetDependencies(
|
|
const TaskID &task_id, const std::vector<rpc::ObjectReference> &required_objects);
|
|
|
|
/// Check whether a task is ready to run. The task ID must
|
|
/// have been previously subscribed by the caller.
|
|
///
|
|
/// \param task_id The ID of the task to check.
|
|
/// \return Whether all of the dependencies for the task are
|
|
/// local.
|
|
bool IsTaskReady(const TaskID &task_id) const;
|
|
|
|
/// Subscribe to object depedencies required by the worker. This should be called for
|
|
/// ray.wait calls during task execution.
|
|
///
|
|
/// The TaskDependencyManager will track all remote dependencies until the
|
|
/// dependencies are local, or until UnsubscribeWaitDependencies is called
|
|
/// with the same worker ID, whichever occurs first. Remote dependencies will
|
|
/// be requested. This method may be called multiple times per worker on the
|
|
/// same objects.
|
|
///
|
|
/// \param worker_id The ID of the worker that called `ray.wait`.
|
|
/// \param required_objects The objects required by the worker.
|
|
/// \return Void.
|
|
void SubscribeWaitDependencies(
|
|
const WorkerID &worker_id,
|
|
const std::vector<rpc::ObjectReference> &required_objects);
|
|
|
|
/// Unsubscribe from the object dependencies required by this task through the task
|
|
/// arguments or `ray.get`. If the objects were remote and are no longer required by any
|
|
/// subscribed task, then they will be canceled.
|
|
///
|
|
/// \param task_id The ID of the task whose dependencies we should unsubscribe from.
|
|
/// \return Whether the task was subscribed before.
|
|
bool UnsubscribeGetDependencies(const TaskID &task_id);
|
|
|
|
/// Unsubscribe from the object dependencies required by this worker through `ray.wait`.
|
|
/// If the objects were remote and are no longer required by any subscribed task, then
|
|
/// they will be canceled.
|
|
///
|
|
/// \param worker_id The ID of the worker whose dependencies we should unsubscribe from.
|
|
/// \return The objects that the worker was waiting on.
|
|
void UnsubscribeWaitDependencies(const WorkerID &worker_id);
|
|
|
|
/// Mark that the given task is pending execution. Any objects that it creates
|
|
/// are now considered to be pending creation. If there are any subscribed
|
|
/// tasks that depend on these objects, then the objects will be canceled.
|
|
///
|
|
/// \param task The task that is pending execution.
|
|
void TaskPending(const Task &task);
|
|
|
|
/// Mark that the given task is no longer pending execution. Any objects that
|
|
/// it creates that are not already local are now considered to be remote. If
|
|
/// there are any subscribed tasks that depend on these objects, then the
|
|
/// objects will be requested.
|
|
///
|
|
/// \param task_id The ID of the task to cancel.
|
|
void TaskCanceled(const TaskID &task_id);
|
|
|
|
/// Handle an object becoming locally available. If there are any subscribed
|
|
/// tasks that depend on this object, then the object will be canceled.
|
|
///
|
|
/// \param object_id The object ID of the object to mark as locally
|
|
/// available.
|
|
/// \return A list of task IDs. This contains all subscribed tasks that now
|
|
/// have all of their dependencies fulfilled, once this object was made
|
|
/// local.
|
|
std::vector<TaskID> HandleObjectLocal(const ray::ObjectID &object_id);
|
|
|
|
/// Handle an object that is no longer locally available. If there are any
|
|
/// subscribed tasks that depend on this object, then the object will be
|
|
/// requested.
|
|
///
|
|
/// \param object_id The object ID of the object that was previously locally
|
|
/// available.
|
|
/// \return A list of task IDs. This contains all subscribed tasks that
|
|
/// previously had all of their dependencies fulfilled, but are now missing
|
|
/// this object dependency.
|
|
std::vector<TaskID> HandleObjectMissing(const ray::ObjectID &object_id);
|
|
|
|
/// Remove all of the tasks specified. These tasks will no longer be
|
|
/// considered pending and the objects they depend on will no longer be
|
|
/// required.
|
|
///
|
|
/// \param task_ids The collection of task IDs. For a given task in this set,
|
|
/// all tasks that depend on the task must also be included in the set.
|
|
void RemoveTasksAndRelatedObjects(const std::unordered_set<TaskID> &task_ids);
|
|
|
|
/// Returns debug string for class.
|
|
///
|
|
/// \return string.
|
|
std::string DebugString() const;
|
|
|
|
/// Get the address of the owner of this object. An address will only be
|
|
/// returned if the caller previously specified that this object is required
|
|
/// on this node, through a call to SubscribeGetDependencies or
|
|
/// SubscribeWaitDependencies.
|
|
///
|
|
/// \param[in] object_id The object whose owner to get.
|
|
/// \param[out] owner_address The address of the object's owner, if
|
|
/// available.
|
|
/// \return True if we have owner information for the object.
|
|
bool GetOwnerAddress(const ObjectID &object_id, rpc::Address *owner_address) const;
|
|
|
|
private:
|
|
struct ObjectDependencies {
|
|
ObjectDependencies(const rpc::ObjectReference &ref)
|
|
: owner_address(ref.owner_address()) {}
|
|
/// The tasks that depend on this object, either because the object is a task argument
|
|
/// or because the task called `ray.get` on the object.
|
|
std::unordered_set<TaskID> dependent_tasks;
|
|
/// The workers that depend on this object because they called `ray.wait` on the
|
|
/// object.
|
|
std::unordered_set<WorkerID> dependent_workers;
|
|
/// The address of the worker that owns this object.
|
|
rpc::Address owner_address;
|
|
|
|
bool Empty() const { return dependent_tasks.empty() && dependent_workers.empty(); }
|
|
};
|
|
|
|
/// A struct to represent the object dependencies of a task.
|
|
struct TaskDependencies {
|
|
/// The objects that the task depends on. These are either the arguments to
|
|
/// the task or objects that the task calls `ray.get` on. These must be
|
|
/// local before the task is ready to execute. Objects are removed from
|
|
/// this set once UnsubscribeGetDependencies is called.
|
|
std::unordered_set<ObjectID> get_dependencies;
|
|
/// The number of object arguments that are not available locally. This
|
|
/// must be zero before the task is ready to execute.
|
|
int64_t num_missing_get_dependencies;
|
|
};
|
|
|
|
/// The objects that the worker is fetching. These are objects that a task that executed
|
|
/// or is executing on the worker called `ray.wait` on that are not yet local. An object
|
|
/// will be automatically removed from this set once it becomes local.
|
|
using WorkerDependencies = std::unordered_set<ObjectID>;
|
|
|
|
/// Check whether the given object needs to be made available through object
|
|
/// transfer or reconstruction. These are objects for which: (1) there is a
|
|
/// subscribed task dependent on it, (2) the object is not local, and (3) the
|
|
/// task that creates the object is not pending execution locally.
|
|
bool CheckObjectRequired(const ObjectID &object_id, rpc::Address *owner_address) const;
|
|
/// If the given object is required, then request that the object be made
|
|
/// available through object transfer or reconstruction.
|
|
void HandleRemoteDependencyRequired(const ObjectID &object_id);
|
|
/// If the given object is no longer required, then cancel any in-progress
|
|
/// operations to make the object available through object transfer or
|
|
/// reconstruction.
|
|
void HandleRemoteDependencyCanceled(const ObjectID &object_id);
|
|
|
|
/// The object manager, used to fetch required objects from remote nodes.
|
|
ObjectManagerInterface &object_manager_;
|
|
/// The reconstruction policy, used to reconstruct required objects that no
|
|
/// longer exist on any live nodes.
|
|
ReconstructionPolicyInterface &reconstruction_policy_;
|
|
/// A mapping from task ID of each subscribed task to its list of object
|
|
/// dependencies, either task arguments or objects passed into `ray.get`.
|
|
std::unordered_map<ray::TaskID, TaskDependencies> task_dependencies_;
|
|
/// A mapping from worker ID to each object that the worker called `ray.wait` on.
|
|
std::unordered_map<ray::WorkerID, WorkerDependencies> worker_dependencies_;
|
|
/// All tasks whose outputs are required by a subscribed task. This is a
|
|
/// mapping from task ID to information about the objects that the task
|
|
/// creates, either by return value or by `ray.put`. For each object, we
|
|
/// store the IDs of the subscribed tasks that are dependent on the object.
|
|
std::unordered_map<ray::TaskID, std::unordered_map<ObjectID, ObjectDependencies>>
|
|
required_tasks_;
|
|
/// Objects that are required by a subscribed task, are not local, and are
|
|
/// not created by a pending task. For these objects, there are pending
|
|
/// operations to make the object available.
|
|
std::unordered_set<ray::ObjectID> required_objects_;
|
|
/// The set of locally available objects.
|
|
std::unordered_set<ray::ObjectID> local_objects_;
|
|
/// The set of tasks that are pending execution. Any objects created by these
|
|
/// tasks that are not already local are pending creation.
|
|
std::unordered_set<ray::TaskID> pending_tasks_;
|
|
};
|
|
|
|
} // namespace raylet
|
|
|
|
} // namespace ray
|