[core worker] Submit Python actor tasks through core worker (#5750)

* Submit actor tasks through core worker

* Fix java

* add comment

* Remove task builder

* Check negative

* Increase -> Increment

* pass by reference

* fix signal

* Clean up c++ actor handle

* more cleanup

* Clean up headers

* Fix unique_ptr construction

* Fix java

* Move profiling to c++

* dedup

* fix error

* comments

* fix java

* Fix tests

* wait for actor to exit

* Start after constructor

* ignore java build

* fix comment

* always init logging

* Fix logging

* fix logging issue

* shared_ptr for profiler

* DEBUG -> WARNING

* fix killed_ init

* Fix flaky checkpointing tests

* -v flag for tune tests

* Fix checkpoint test logic

* Fix exception matching

* timeout exception

* Fix test exception info

* Fix import

* fix build

* Fix test

* shared_ptr
This commit is contained in:
Edward Oakes
2019-10-07 15:42:19 -07:00
committed by GitHub
parent 04e997fe0d
commit 08e4e3a153
24 changed files with 659 additions and 888 deletions
-159
View File
@@ -41,7 +41,6 @@ import ray.signature
import ray.state
from ray import (
ActorHandleID,
ActorID,
WorkerID,
JobID,
@@ -569,164 +568,6 @@ class Worker(object):
assert len(results) == len(object_ids)
return results
def submit_task(self,
function_descriptor,
args,
actor_id=None,
actor_handle_id=None,
actor_counter=0,
actor_creation_id=None,
actor_creation_dummy_object_id=None,
previous_actor_task_dummy_object_id=None,
max_actor_reconstructions=0,
new_actor_handles=None,
num_return_vals=None,
resources=None,
placement_resources=None,
job_id=None):
"""Submit a remote task to the scheduler.
Tell the scheduler to schedule the execution of the function with
function_descriptor with arguments args. Retrieve object IDs for the
outputs of the function from the scheduler and immediately return them.
Args:
function_descriptor: The function descriptor to execute.
args: The arguments to pass into the function. Arguments can be
object IDs or they can be values. If they are values, they must
be serializable objects.
actor_id: The ID of the actor that this task is for.
actor_counter: The counter of the actor task.
actor_creation_id: The ID of the actor to create, if this is an
actor creation task.
actor_creation_dummy_object_id: If this task is an actor method,
then this argument is the dummy object ID associated with the
actor creation task for the corresponding actor.
previous_actor_task_dummy_object_id: If this task is an actor,
then this argument is the dummy object ID associated with the
task previously submitted to the corresponding actor.
num_return_vals: The number of return values this function should
have.
resources: The resource requirements for this task.
placement_resources: The resources required for placing the task.
If this is not provided or if it is an empty dictionary, then
the placement resources will be equal to resources.
job_id: The ID of the relevant job. This is almost always the
job ID of the job that is currently running. However, in
the exceptional case that an actor task is being dispatched to
an actor created by a different job, this should be the
job ID of the job that created the actor.
Returns:
The return object IDs for this task.
"""
with profiling.profile("submit_task"):
if actor_id is None:
assert actor_handle_id is None
actor_id = ActorID.nil()
actor_handle_id = ActorHandleID.nil()
else:
assert actor_handle_id is not None
if actor_creation_id is None:
actor_creation_id = ActorID.nil()
if actor_creation_dummy_object_id is None:
actor_creation_dummy_object_id = ObjectID.nil()
# Put large or complex arguments that are passed by value in the
# object store first.
args_for_raylet = []
for arg in args:
if isinstance(arg, ObjectID):
args_for_raylet.append(arg)
elif ray._raylet.check_simple_value(arg):
args_for_raylet.append(arg)
else:
args_for_raylet.append(put(arg))
if new_actor_handles is None:
new_actor_handles = []
if job_id is None:
job_id = self.current_job_id
if resources is None:
raise ValueError("The resources dictionary is required.")
for value in resources.values():
assert (isinstance(value, int) or isinstance(value, float))
if value < 0:
raise ValueError(
"Resource quantities must be nonnegative.")
if (value >= 1 and isinstance(value, float)
and not value.is_integer()):
raise ValueError(
"Resource quantities must all be whole numbers.")
# Remove any resources with zero quantity requirements
resources = {
resource_label: resource_quantity
for resource_label, resource_quantity in resources.items()
if resource_quantity > 0
}
if placement_resources is None:
placement_resources = {}
# Increment the worker's task index to track how many tasks
# have been submitted by the current task so far.
self.task_context.task_index += 1
# The parent task must be set for the submitted task.
assert not self.current_task_id.is_nil()
# Current driver id must not be nil when submitting a task.
# Because every task must belong to a driver.
assert not self.current_job_id.is_nil()
# Submit the task to raylet.
function_descriptor_list = (
function_descriptor.get_function_descriptor_list())
assert isinstance(job_id, JobID)
if actor_creation_id is not None and not actor_creation_id.is_nil(
):
# This is an actor creation task.
task_id = TaskID.for_actor_creation_task(actor_creation_id)
elif actor_id is not None and not actor_id.is_nil():
# This is an actor task.
task_id = TaskID.for_actor_task(
self.current_job_id, self.current_task_id,
self.task_context.task_index, actor_id)
else:
# Normal tasks are submitted through the core worker (in the
# future, all tasks will be).
return self.core_worker.submit_task(function_descriptor_list,
args_for_raylet,
num_return_vals, resources)
# Actor creation tasks and actor tasks are submitted directly to
# the raylet.
task = ray._raylet.TaskSpec(
task_id,
job_id,
function_descriptor_list,
args_for_raylet,
num_return_vals,
self.current_task_id,
self.task_context.task_index,
actor_creation_id,
actor_creation_dummy_object_id,
previous_actor_task_dummy_object_id,
max_actor_reconstructions,
actor_id,
actor_handle_id,
actor_counter,
new_actor_handles,
resources,
placement_resources,
)
self.raylet_client.submit_task(task)
return task.returns()
def run_function_on_all_workers(self, function,
run_on_other_drivers=False):
"""Run arbitrary code on all of the workers.