Prototype distributed actor handles (#1137)

* Add actor handle ID to the task spec * Local scheduler dispatches actor tasks according to a task counter per handle * Fix python test * Allow passing actor handles into tasks. Not completely working yet. Also this is very messy. * Fixes, should be roughly working now. * Refactor actor handle wrapper * Fix __init__ tests * Terminate actor when the original handle goes out of scope * TODO and a couple test cases * Make tests for unsupported cases * Fix Python mode tests * Linting. * Cache actor definitions that occur before ray.init() is called. * Fix export actor class * Deterministically compute actor handle ID * Fix __getattribute__ * Fix string encoding for python3 * doc * Add comment and assertion.
2026-06-29 17:53:12 +08:00 · 2017-10-19 23:49:59 -07:00
parent 2f45ac9e95
commit af47737bd5
11 changed files with 799 additions and 406 deletions
@@ -26,16 +26,40 @@ def random_actor_class_id():
    return random_string()


-def get_actor_method_function_id(attr):
+def compute_actor_handle_id(actor_handle_id, num_forks):
+    """Deterministically comopute an actor handle ID.
+
+    A new actor handle ID is generated when it is forked from another actor
+    handle. The new handle ID is computed as hash(old_handle_id || num_forks).
+
+    Args:
+        actor_handle_id (common.ObjectID): The original actor handle ID.
+        num_forks: The number of times the original actor handle has been
+                   forked so far.
+
+    Returns:
+        An object ID for the new actor handle.
+    """
+    handle_id_hash = hashlib.sha1()
+    handle_id_hash.update(actor_handle_id.id())
+    handle_id_hash.update(str(num_forks).encode("ascii"))
+    handle_id = handle_id_hash.digest()
+    assert len(handle_id) == 20
+    return ray.local_scheduler.ObjectID(handle_id)
+
+
+def compute_actor_method_function_id(class_name, attr):
    """Get the function ID corresponding to an actor method.

    Args:
+        class_name (str): The class name of the actor.
        attr (str): The attribute name of the method.

    Returns:
        Function ID corresponding to the method.
    """
    function_id_hash = hashlib.sha1()
+    function_id_hash.update(class_name)
    function_id_hash.update(attr.encode("ascii"))
    function_id = function_id_hash.digest()
    assert len(function_id) == 20
@@ -203,19 +227,18 @@ def fetch_and_register_actor(actor_class_key, worker):
    def temporary_actor_method(*xs):
        raise Exception("The actor with name {} failed to be imported, and so "
                        "cannot execute this method".format(actor_name))
+    # Register the actor method signatures.
+    register_actor_signatures(worker, driver_id, class_name,
+                              actor_method_names)
+    # Register the actor method executors.
    for actor_method_name in actor_method_names:
-        function_id = get_actor_method_function_id(actor_method_name).id()
+        function_id = compute_actor_method_function_id(class_name,
+                                                       actor_method_name).id()
        temporary_executor = make_actor_method_executor(worker,
                                                        actor_method_name,
                                                        temporary_actor_method)
        worker.functions[driver_id][function_id] = (actor_method_name,
                                                    temporary_executor)
-        worker.function_properties[driver_id][function_id] = (
-            FunctionProperties(num_return_vals=2,
-                               num_cpus=1,
-                               num_gpus=0,
-                               num_custom_resource=0,
-                               max_calls=0))
        worker.num_task_executions[driver_id][function_id] = 0

    try:
@@ -226,7 +249,8 @@ def fetch_and_register_actor(actor_class_key, worker):
        # traceback and notify the scheduler of the failure.
        traceback_str = ray.worker.format_error_message(traceback.format_exc())
        # Log the error message.
-        worker.push_error_to_driver(driver_id, "register_actor", traceback_str,
+        worker.push_error_to_driver(driver_id, "register_actor_signatures",
+                                    traceback_str,
                                    data={"actor_id": actor_id_str})
        # TODO(rkn): In the future, it might make sense to have the worker exit
        # here. However, currently that would lead to hanging if someone calls
@@ -239,7 +263,8 @@ def fetch_and_register_actor(actor_class_key, worker):
            unpickled_class, predicate=(lambda x: (inspect.isfunction(x) or
                                                   inspect.ismethod(x))))
        for actor_method_name, actor_method in actor_methods:
-            function_id = get_actor_method_function_id(actor_method_name).id()
+            function_id = compute_actor_method_function_id(
+                class_name, actor_method_name).id()
            executor = make_actor_method_executor(worker, actor_method_name,
                                                  actor_method)
            worker.functions[driver_id][function_id] = (actor_method_name,
@@ -256,28 +281,86 @@ def fetch_and_register_actor(actor_class_key, worker):
        worker.local_scheduler_id = binary_to_hex(local_scheduler_id)


-def export_actor_class(class_id, Class, actor_method_names,
-                       checkpoint_interval, worker):
-    if worker.mode is None:
-        raise Exception("Actors cannot be created before Ray has been "
-                        "started. You can start Ray with 'ray.init()'.")
-    key = b"ActorClass:" + class_id
-    d = {"driver_id": worker.task_driver_id.id(),
-         "class_name": Class.__name__,
-         "module": Class.__module__,
-         "class": pickle.dumps(Class),
-         "checkpoint_interval": checkpoint_interval,
-         "actor_method_names": json.dumps(list(actor_method_names))}
-    worker.redis_client.hmset(key, d)
+def register_actor_signatures(worker, driver_id, class_name,
+                              actor_method_names):
+    """Register an actor's method signatures in the worker.
+
+    Args:
+        worker: The worker to register the signatures on.
+        driver_id: The ID of the driver that this actor is associated with.
+        actor_id: The ID of the actor.
+        actor_method_names: The names of the methods to register.
+    """
+    for actor_method_name in actor_method_names:
+        # TODO(rkn): When we create a second actor, we are probably overwriting
+        # the values from the first actor here. This may or may not be a
+        # problem.
+        function_id = compute_actor_method_function_id(class_name,
+                                                       actor_method_name).id()
+        # For now, all actor methods have 1 return value.
+        worker.function_properties[driver_id][function_id] = (
+            FunctionProperties(num_return_vals=2,
+                               num_cpus=1,
+                               num_gpus=0,
+                               num_custom_resource=0,
+                               max_calls=0))
+
+
+def publish_actor_class_to_key(key, actor_class_info, worker):
+    """Push an actor class definition to Redis.
+
+    The is factored out as a separate function because it is also called
+    on cached actor class definitions when a worker connects for the first
+    time.
+
+    Args:
+        key: The key to store the actor class info at.
+        actor_class_info: Information about the actor class.
+        worker: The worker to use to connect to Redis.
+    """
+    # We set the driver ID here because it may not have been available when the
+    # actor class was defined.
+    actor_class_info["driver_id"] = worker.task_driver_id.id()
+    worker.redis_client.hmset(key, actor_class_info)
    worker.redis_client.rpush("Exports", key)


-def export_actor(actor_id, class_id, actor_method_names, num_cpus, num_gpus,
-                 worker):
+def export_actor_class(class_id, Class, actor_method_names,
+                       checkpoint_interval, worker):
+    key = b"ActorClass:" + class_id
+    actor_class_info = {
+        "class_name": Class.__name__,
+        "module": Class.__module__,
+        "class": pickle.dumps(Class),
+        "checkpoint_interval": checkpoint_interval,
+        "actor_method_names": json.dumps(list(actor_method_names))}
+
+    if worker.mode is None:
+        # This means that 'ray.init()' has not been called yet and so we must
+        # cache the actor class definition and export it when 'ray.init()' is
+        # called.
+        assert worker.cached_remote_functions_and_actors is not None
+        worker.cached_remote_functions_and_actors.append(
+            ("actor", (key, actor_class_info)))
+        # This caching code path is currently not used because we only export
+        # actor class definitions lazily when we instantiate the actor for the
+        # first time.
+        assert False, "This should be unreachable."
+    else:
+        publish_actor_class_to_key(key, actor_class_info, worker)
+    # TODO(rkn): Currently we allow actor classes to be defined within tasks.
+    # I tried to disable this, but it may be necessary because of
+    # https://github.com/ray-project/ray/issues/1146.
+
+
+def export_actor(actor_id, class_id, class_name, actor_method_names, num_cpus,
+                 num_gpus, worker):
    """Export an actor to redis.

    Args:
-        actor_id: The ID of the actor.
+        actor_id (common.ObjectID): The ID of the actor.
+        class_id (str): A random ID for the actor class.
+        class_name (str): The actor class name.
        actor_method_names (list): A list of the names of this actor's methods.
        num_cpus (int): The number of CPUs that this actor requires.
        num_gpus (int): The number of GPUs that this actor requires.
@@ -286,23 +369,13 @@ def export_actor(actor_id, class_id, actor_method_names, num_cpus, num_gpus,
    if worker.mode is None:
        raise Exception("Actors cannot be created before Ray has been "
                        "started. You can start Ray with 'ray.init()'.")
-    key = b"Actor:" + actor_id.id()

-    # For now, all actor methods have 1 return value.
    driver_id = worker.task_driver_id.id()
-    for actor_method_name in actor_method_names:
-        # TODO(rkn): When we create a second actor, we are probably overwriting
-        # the values from the first actor here. This may or may not be a
-        # problem.
-        function_id = get_actor_method_function_id(actor_method_name).id()
-        worker.function_properties[driver_id][function_id] = (
-            FunctionProperties(num_return_vals=2,
-                               num_cpus=1,
-                               num_gpus=0,
-                               num_custom_resource=0,
-                               max_calls=0))
+    register_actor_signatures(worker, driver_id, class_name,
+                              actor_method_names)

    # Select a local scheduler for the actor.
+    key = b"Actor:" + actor_id.id()
    local_scheduler_id = select_local_scheduler(
        worker.task_driver_id.id(), ray.global_state.local_schedulers(),
        num_gpus, worker.redis_client)
@@ -311,6 +384,7 @@ def export_actor(actor_id, class_id, actor_method_names, num_cpus, num_gpus,
    # We must put the actor information in Redis before publishing the actor
    # notification so that when the newly created actor attempts to fetch the
    # information from Redis, it is already there.
+    driver_id = worker.task_driver_id.id()
    worker.redis_client.hmset(key, {"class_id": class_id,
                                    "driver_id": driver_id,
                                    "local_scheduler_id": local_scheduler_id,
@@ -326,6 +400,340 @@ def export_actor(actor_id, class_id, actor_method_names, num_cpus, num_gpus,
                                     worker.redis_client)


+# Create objects to wrap method invocations. This is done so that we can
+# invoke methods with actor.method.remote() instead of actor.method().
+class ActorMethod(object):
+    def __init__(self, actor, method_name):
+        self.actor = actor
+        self.method_name = method_name
+
+    def __call__(self, *args, **kwargs):
+        raise Exception("Actor methods cannot be called directly. Instead "
+                        "of running 'object.{}()', try "
+                        "'object.{}.remote()'."
+                        .format(self.method_name, self.method_name))
+
+    def remote(self, *args, **kwargs):
+        return self.actor._actor_method_call(
+            self.method_name, args=args, kwargs=kwargs,
+            dependency=self.actor._ray_actor_cursor)
+
+
+# Checkpoint methods do not take in the state of the previous actor method
+# as an explicit data dependency.
+class CheckpointMethod(ActorMethod):
+    def remote(self):
+        # A checkpoint's arguments are the current task counter and the
+        # object ID of the preceding task. The latter is an implicit data
+        # dependency, since the checkpoint method can run at any time.
+        args = [self.actor._ray_actor_counter,
+                [self.actor._ray_actor_cursor]]
+        return self.actor._actor_method_call(self.method_name, args=args)
+
+
+class ActorHandleWrapper(object):
+    """A wrapper for the contents of an ActorHandle.
+
+    This is essentially just a dictionary, but it is used so that the recipient
+    can tell that an argument is an ActorHandle.
+    """
+    def __init__(self, actor_id, actor_handle_id, actor_cursor, actor_counter,
+                 actor_method_names, method_signatures, checkpoint_interval,
+                 class_name):
+        self.actor_id = actor_id
+        self.actor_handle_id = actor_handle_id
+        self.actor_cursor = actor_cursor
+        self.actor_counter = actor_counter
+        self.actor_method_names = actor_method_names
+        # TODO(swang): Fetch this information from Redis so that we don't have
+        # to fall back to pickle.
+        self.method_signatures = method_signatures
+        self.checkpoint_interval = checkpoint_interval
+        self.class_name = class_name
+
+
+def wrap_actor_handle(actor_handle):
+    """Wrap the ActorHandle to store the fields.
+
+    Args:
+        actor_handle: The ActorHandle instance to wrap.
+
+    Returns:
+        An ActorHandleWrapper instance that stores the ActorHandle's fields.
+    """
+    if actor_handle._ray_checkpoint_interval > 0:
+        raise Exception("Checkpointing not yet supported for distributed "
+                        "actor handles.")
+    wrapper = ActorHandleWrapper(
+        actor_handle._ray_actor_id,
+        compute_actor_handle_id(actor_handle._ray_actor_handle_id,
+                                actor_handle._ray_actor_forks),
+        actor_handle._ray_actor_cursor,
+        0,  # Reset the actor counter.
+        actor_handle._ray_actor_method_names,
+        actor_handle._ray_method_signatures,
+        actor_handle._ray_checkpoint_interval,
+        actor_handle._ray_class_name)
+    actor_handle._ray_actor_forks += 1
+    return wrapper
+
+
+def unwrap_actor_handle(worker, wrapper):
+    """Make an ActorHandle from the stored fields.
+
+    Args:
+        worker: The worker that is unwrapping the actor handle.
+        wrapper: An ActorHandleWrapper instance to unwrap.
+
+    Returns:
+        The unwrapped ActorHandle instance.
+    """
+    driver_id = worker.task_driver_id.id()
+    register_actor_signatures(worker, driver_id, wrapper.class_name,
+                              wrapper.actor_method_names)
+
+    actor_handle_class = make_actor_handle_class(wrapper.class_name)
+    actor_object = actor_handle_class.__new__(actor_handle_class)
+    actor_object._manual_init(
+        wrapper.actor_id,
+        wrapper.actor_handle_id,
+        wrapper.actor_cursor,
+        wrapper.actor_counter,
+        wrapper.actor_method_names,
+        wrapper.method_signatures,
+        wrapper.checkpoint_interval)
+    return actor_object
+
+
+class ActorHandleParent(object):
+    """This is the parent class of all ActorHandle classes.
+
+    This enables us to identify actor handles by checking if an object obj
+    satisfies isinstance(obj, ActorHandleParent).
+    """
+    pass
+
+
+def make_actor_handle_class(class_name):
+    class ActorHandle(ActorHandleParent):
+        def __init__(self, *args, **kwargs):
+            raise Exception("Actor classes cannot be instantiated directly. "
+                            "Instead of running '{}()', try '{}.remote()'."
+                            .format(class_name, class_name))
+
+        @classmethod
+        def remote(cls, *args, **kwargs):
+            raise NotImplementedError("The classmethod remote() can only be "
+                                      "called on the original Class.")
+
+        def _manual_init(self, actor_id, actor_handle_id, actor_cursor,
+                         actor_counter, actor_method_names, method_signatures,
+                         checkpoint_interval):
+            self._ray_actor_id = actor_id
+            self._ray_actor_handle_id = actor_handle_id
+            self._ray_actor_cursor = actor_cursor
+            self._ray_actor_counter = actor_counter
+            self._ray_actor_method_names = actor_method_names
+            self._ray_method_signatures = method_signatures
+            self._ray_checkpoint_interval = checkpoint_interval
+            self._ray_class_name = class_name
+            self._ray_actor_forks = 0
+
+        def _actor_method_call(self, method_name, args=None, kwargs=None,
+                               dependency=None):
+            """Method execution stub for an actor handle.
+
+            This is the function that executes when
+            `actor.method_name.remote(*args, **kwargs)` is called. Instead of
+            executing locally, the method is packaged as a task and scheduled
+            to the remote actor instance.
+
+            Args:
+                self: The local actor handle.
+                method_name: The name of the actor method to execute.
+                args: A list of arguments for the actor method.
+                kwargs: A dictionary of keyword arguments for the actor method.
+                dependency: The object ID that this method is dependent on.
+                    Defaults to None, for no dependencies. Most tasks should
+                    pass in the dummy object returned by the preceding task.
+                    Some tasks, such as checkpoint and terminate methods, have
+                    no dependencies.
+
+            Returns:
+                object_ids: A list of object IDs returned by the remote actor
+                    method.
+            """
+            ray.worker.check_connected()
+            ray.worker.check_main_thread()
+            function_signature = self._ray_method_signatures[method_name]
+            if args is None:
+                args = []
+            if kwargs is None:
+                kwargs = {}
+            args = signature.extend_args(function_signature, args, kwargs)
+
+            # Execute functions locally if Ray is run in PYTHON_MODE
+            # Copy args to prevent the function from mutating them.
+            if ray.worker.global_worker.mode == ray.PYTHON_MODE:
+                return getattr(
+                    ray.worker.global_worker.actors[self._ray_actor_id],
+                    method_name)(*copy.deepcopy(args))
+
+            # Add the dummy argument that represents dependency on a preceding
+            # task.
+            args.append(dependency)
+
+            is_actor_checkpoint_method = (method_name == "__ray_checkpoint__")
+
+            function_id = compute_actor_method_function_id(
+                self._ray_class_name, method_name)
+            object_ids = ray.worker.global_worker.submit_task(
+                function_id, args, actor_id=self._ray_actor_id,
+                actor_handle_id=self._ray_actor_handle_id,
+                actor_counter=self._ray_actor_counter,
+                is_actor_checkpoint_method=is_actor_checkpoint_method)
+            # Update the actor counter and cursor to reflect the most recent
+            # invocation.
+            self._ray_actor_counter += 1
+            self._ray_actor_cursor = object_ids.pop()
+
+            # Submit a checkpoint task if it is time to do so.
+            if (self._ray_checkpoint_interval > 1 and
+                    self._ray_actor_counter % self._ray_checkpoint_interval ==
+                    0):
+                self.__ray_checkpoint__.remote()
+
+            # The last object returned is the dummy object that should be
+            # passed in to the next actor method. Do not return it to the user.
+            if len(object_ids) == 1:
+                return object_ids[0]
+            elif len(object_ids) > 1:
+                return object_ids
+
+        # Make tab completion work.
+        def __dir__(self):
+            return self._ray_actor_method_names
+
+        def __getattribute__(self, attr):
+            try:
+                # Check whether this is an actor method.
+                actor_method_names = object.__getattribute__(
+                    self, "_ray_actor_method_names")
+                if attr in actor_method_names:
+                    # We create the ActorMethod on the fly here so that the
+                    # ActorHandle doesn't need a reference to the ActorMethod.
+                    # The ActorMethod has a reference to the ActorHandle and
+                    # this was causing cyclic references which were prevent
+                    # object deallocation from behaving in a predictable
+                    # manner.
+                    if attr == "__ray_checkpoint__":
+                        actor_method_cls = CheckpointMethod
+                    else:
+                        actor_method_cls = ActorMethod
+                    return actor_method_cls(self, attr)
+            except AttributeError:
+                pass
+
+            # If the requested attribute is not a registered method, fall back
+            # to default __getattribute__.
+            return object.__getattribute__(self, attr)
+
+        def __repr__(self):
+            return "Actor(" + self._ray_actor_id.hex() + ")"
+
+        def __reduce__(self):
+            raise Exception("Actor objects cannot be pickled.")
+
+        def __del__(self):
+            """Kill the worker that is running this actor."""
+            # TODO(swang): Also clean up forked actor handles.
+            # Kill the worker if this is the original actor handle, created
+            # with Class.remote().
+            if (ray.worker.global_worker.connected and
+                    self._ray_actor_handle_id.id() == ray.worker.NIL_ACTOR_ID):
+                self._actor_method_call("__ray_terminate__",
+                                        args=[self._ray_actor_id.id()])
+
+    return ActorHandle
+
+
+def actor_handle_from_class(Class, class_id, num_cpus, num_gpus,
+                            checkpoint_interval):
+    class_name = Class.__name__.encode("ascii")
+    actor_handle_class = make_actor_handle_class(class_name)
+    exported = []
+
+    class ActorHandle(actor_handle_class):
+
+        @classmethod
+        def remote(cls, *args, **kwargs):
+            actor_id = random_actor_id()
+            # The ID for this instance of ActorHandle. These should be unique
+            # across instances with the same _ray_actor_id.
+            actor_handle_id = ray.local_scheduler.ObjectID(
+                ray.worker.NIL_ACTOR_ID)
+            # The actor cursor is a dummy object representing the most recent
+            # actor method invocation. For each subsequent method invocation,
+            # the current cursor should be added as a dependency, and then
+            # updated to reflect the new invocation.
+            actor_cursor = None
+            # The number of actor method invocations that we've called so far.
+            actor_counter = 0
+            # Get the actor methods of the given class.
+            actor_methods = inspect.getmembers(
+                Class, predicate=(lambda x: (inspect.isfunction(x) or
+                                             inspect.ismethod(x))))
+            # Extract the signatures of each of the methods. This will be used
+            # to catch some errors if the methods are called with inappropriate
+            # arguments.
+            method_signatures = dict()
+            for k, v in actor_methods:
+                # Print a warning message if the method signature is not
+                # supported. We don't raise an exception because if the actor
+                # inherits from a class that has a method whose signature we
+                # don't support, we there may not be much the user can do about
+                # it.
+                signature.check_signature_supported(v, warn=True)
+                method_signatures[k] = signature.extract_signature(
+                    v, ignore_first=True)
+
+            actor_method_names = [method_name for method_name, _ in
+                                  actor_methods]
+            # Do not export the actor class or the actor if run in PYTHON_MODE
+            # Instead, instantiate the actor locally and add it to
+            # global_worker's dictionary
+            if ray.worker.global_worker.mode == ray.PYTHON_MODE:
+                ray.worker.global_worker.actors[actor_id] = (
+                    Class.__new__(Class))
+            else:
+                # Export the actor.
+                if not exported:
+                    export_actor_class(class_id, Class, actor_method_names,
+                                       checkpoint_interval,
+                                       ray.worker.global_worker)
+                    exported.append(0)
+                export_actor(actor_id, class_id, class_name,
+                             actor_method_names, num_cpus, num_gpus,
+                             ray.worker.global_worker)
+
+            # Instantiate the actor handle.
+            actor_object = cls.__new__(cls)
+            actor_object._manual_init(actor_id, actor_handle_id, actor_cursor,
+                                      actor_counter, actor_method_names,
+                                      method_signatures, checkpoint_interval)
+
+            # Call __init__ as a remote function.
+            if "__init__" in actor_object._ray_actor_method_names:
+                actor_object._actor_method_call("__init__", args=args,
+                                                kwargs=kwargs)
+            else:
+                print("WARNING: this object has no __init__ method.")
+
+            return actor_object
+
+    return ActorHandle
+
+
 def make_actor(cls, num_cpus, num_gpus, checkpoint_interval):
    if checkpoint_interval == 0:
        raise Exception("checkpoint_interval must be greater than 0.")
@@ -472,216 +880,9 @@ def make_actor(cls, num_cpus, num_gpus, checkpoint_interval):
    Class.__name__ = cls.__name__

    class_id = random_actor_class_id()
-    # The list exported will have length 0 if the class has not been exported
-    # yet, and length one if it has. This is just implementing a bool, but we
-    # don't use a bool because we need to modify it inside of the ActorHandle
-    # constructor.
-    exported = []

-    # Create objects to wrap method invocations. This is done so that we can
-    # invoke methods with actor.method.remote() instead of actor.method().
-    class ActorMethod(object):
-        def __init__(self, actor, method_name):
-            self.actor = actor
-            self.method_name = method_name
-
-        def __call__(self, *args, **kwargs):
-            raise Exception("Actor methods cannot be called directly. Instead "
-                            "of running 'object.{}()', try "
-                            "'object.{}.remote()'."
-                            .format(self.method_name, self.method_name))
-
-        def remote(self, *args, **kwargs):
-            return self.actor._actor_method_call(
-                self.method_name, args=args, kwargs=kwargs,
-                dependency=self.actor._ray_actor_cursor)
-
-    # Checkpoint methods do not take in the state of the previous actor method
-    # as an explicit data dependency.
-    class CheckpointMethod(ActorMethod):
-        def remote(self):
-            # A checkpoint's arguments are the current task counter and the
-            # object ID of the preceding task. The latter is an implicit data
-            # dependency, since the checkpoint method can run at any time.
-            args = [self.actor._ray_actor_counter,
-                    [self.actor._ray_actor_cursor]]
-            return self.actor._actor_method_call(self.method_name, args=args)
-
-    class ActorHandle(object):
-        def __init__(self, *args, **kwargs):
-            raise Exception("Actor classes cannot be instantiated directly. "
-                            "Instead of running '{}()', try '{}.remote()'."
-                            .format(Class.__name__, Class.__name__))
-
-        @classmethod
-        def remote(cls, *args, **kwargs):
-            actor_object = cls.__new__(cls)
-            actor_object._manual_init(*args, **kwargs)
-            return actor_object
-
-        def _manual_init(self, *args, **kwargs):
-            self._ray_actor_id = random_actor_id()
-            # The number of actor method invocations that we've called so far.
-            self._ray_actor_counter = 0
-            # The actor cursor is a dummy object representing the most recent
-            # actor method invocation. For each subsequent method invocation,
-            # the current cursor should be added as a dependency, and then
-            # updated to reflect the new invocation.
-            self._ray_actor_cursor = None
-            ray_actor_methods = inspect.getmembers(
-                Class, predicate=(lambda x: (inspect.isfunction(x) or
-                                             inspect.ismethod(x))))
-            self._ray_actor_methods = {}
-            for actor_method_name, actor_method in ray_actor_methods:
-                self._ray_actor_methods[actor_method_name] = actor_method
-            # Extract the signatures of each of the methods. This will be used
-            # to catch some errors if the methods are called with inappropriate
-            # arguments.
-            self._ray_method_signatures = dict()
-            for k, v in self._ray_actor_methods.items():
-                # Print a warning message if the method signature is not
-                # supported. We don't raise an exception because if the actor
-                # inherits from a class that has a method whose signature we
-                # don't support, we there may not be much the user can do about
-                # it.
-                signature.check_signature_supported(v, warn=True)
-                self._ray_method_signatures[k] = signature.extract_signature(
-                    v, ignore_first=True)
-
-            # Do not export the actor class or the actor if run in PYTHON_MODE
-            # Instead, instantiate the actor locally and add it to
-            # global_worker's dictionary
-            if ray.worker.global_worker.mode == ray.PYTHON_MODE:
-                ray.worker.global_worker.actors[self._ray_actor_id] = (
-                    Class.__new__(Class))
-            else:
-                # Export the actor class if it has not been exported yet.
-                if len(exported) == 0:
-                    export_actor_class(class_id, Class,
-                                       self._ray_actor_methods.keys(),
-                                       checkpoint_interval,
-                                       ray.worker.global_worker)
-                    exported.append(0)
-                # Export the actor.
-                export_actor(self._ray_actor_id, class_id,
-                             self._ray_actor_methods.keys(), num_cpus,
-                             num_gpus, ray.worker.global_worker)
-
-            # Call __init__ as a remote function.
-            if "__init__" in self._ray_actor_methods.keys():
-                self._actor_method_call("__init__", args=args, kwargs=kwargs)
-            else:
-                print("WARNING: this object has no __init__ method.")
-
-        def _actor_method_call(self, method_name, args=None, kwargs=None,
-                               dependency=None):
-            """Method execution stub for an actor handle.
-
-            This is the function that executes when
-            `actor.method_name.remote(*args, **kwargs)` is called. Instead of
-            executing locally, the method is packaged as a task and scheduled
-            to the remote actor instance.
-
-            Args:
-                self: The local actor handle.
-                method_name: The name of the actor method to execute.
-                args: A list of arguments for the actor method.
-                kwargs: A dictionary of keyword arguments for the actor method.
-                dependency: The object ID that this method is dependent on.
-                    Defaults to None, for no dependencies. Most tasks should
-                    pass in the dummy object returned by the preceding task.
-                    Some tasks, such as checkpoint and terminate methods, have
-                    no dependencies.
-
-            Returns:
-                object_ids: A list of object IDs returned by the remote actor
-                    method.
-            """
-            ray.worker.check_connected()
-            ray.worker.check_main_thread()
-            function_signature = self._ray_method_signatures[method_name]
-            if args is None:
-                args = []
-            if kwargs is None:
-                kwargs = {}
-            args = signature.extend_args(function_signature, args, kwargs)
-
-            # Execute functions locally if Ray is run in PYTHON_MODE
-            # Copy args to prevent the function from mutating them.
-            if ray.worker.global_worker.mode == ray.PYTHON_MODE:
-                return getattr(
-                    ray.worker.global_worker.actors[self._ray_actor_id],
-                    method_name)(*copy.deepcopy(args))
-
-            # Add the dummy argument that represents dependency on a preceding
-            # task.
-            args.append(dependency)
-
-            is_actor_checkpoint_method = (method_name == "__ray_checkpoint__")
-
-            function_id = get_actor_method_function_id(method_name)
-            object_ids = ray.worker.global_worker.submit_task(
-                function_id, args, actor_id=self._ray_actor_id,
-                actor_counter=self._ray_actor_counter,
-                is_actor_checkpoint_method=is_actor_checkpoint_method)
-            # Update the actor counter and cursor to reflect the most recent
-            # invocation.
-            self._ray_actor_counter += 1
-            self._ray_actor_cursor = object_ids.pop()
-
-            # Submit a checkpoint task if it is time to do so.
-            if (checkpoint_interval > 1 and
-                    self._ray_actor_counter % checkpoint_interval == 0):
-                self.__ray_checkpoint__.remote()
-
-            # The last object returned is the dummy object that should be
-            # passed in to the next actor method. Do not return it to the user.
-            if len(object_ids) == 1:
-                return object_ids[0]
-            elif len(object_ids) > 1:
-                return object_ids
-
-        # Make tab completion work.
-        def __dir__(self):
-            return self._ray_actor_methods
-
-        def __getattribute__(self, attr):
-            # The following is needed so we can still access
-            # self.actor_methods.
-            if attr in ["_manual_init", "_ray_actor_id", "_ray_actor_counter",
-                        "_ray_actor_cursor", "_ray_actor_methods",
-                        "_actor_method_invokers", "_ray_method_signatures",
-                        "_actor_method_call"]:
-                return object.__getattribute__(self, attr)
-            if attr in self._ray_actor_methods.keys():
-                # We create the ActorMethod on the fly here so that the
-                # ActorHandle doesn't need a reference to the ActorMethod. The
-                # ActorMethod has a reference to the ActorHandle and this was
-                # causing cyclic references which were prevent object
-                # deallocation from behaving in a predictable manner.
-                if attr == "__ray_checkpoint__":
-                    actor_method_cls = CheckpointMethod
-                else:
-                    actor_method_cls = ActorMethod
-                return actor_method_cls(self, attr)
-            else:
-                # There is no method with this name, so raise an exception.
-                raise AttributeError("'{}' Actor object has no attribute '{}'"
-                                     .format(Class, attr))
-
-        def __repr__(self):
-            return "Actor(" + self._ray_actor_id.hex() + ")"
-
-        def __reduce__(self):
-            raise Exception("Actor objects cannot be pickled.")
-
-        def __del__(self):
-            """Kill the worker that is running this actor."""
-            if ray.worker.global_worker.connected:
-                self._actor_method_call("__ray_terminate__",
-                                        args=[self._ray_actor_id.id()])
-
-    return ActorHandle
+    return actor_handle_from_class(Class, class_id, num_cpus, num_gpus,
+                                   checkpoint_interval)


 ray.worker.global_worker.fetch_and_register_actor = fetch_and_register_actor
@@ -170,6 +170,7 @@ class TestGlobalScheduler(unittest.TestCase):
        task2 = local_scheduler.Task(random_driver_id(), random_function_id(),
                                     [random_object_id()], 0, random_task_id(),
                                     0, local_scheduler.ObjectID(NIL_ACTOR_ID),
+                                     local_scheduler.ObjectID(NIL_ACTOR_ID),
                                     0, 0, [1.0, 2.0, 0.0])
        self.assertEqual(task2.required_resources(), [1.0, 2.0, 0.0])

@@ -184,14 +184,12 @@ class Worker(object):
        connected (bool): True if Ray has been started and False otherwise.
        mode: The mode of the worker. One of SCRIPT_MODE, PYTHON_MODE,
            SILENT_MODE, and WORKER_MODE.
-        cached_remote_functions (List[Tuple[str, str]]): A list of pairs
-            representing the remote functions that were defined before the
-            worker called connect. The first element is the name of the remote
-            function, and the second element is the serialized remote function.
-            When the worker eventually does call connect, if it is a driver, it
-            will export these functions to the scheduler. If
-            cached_remote_functions is None, that means that connect has been
-            called already.
+        cached_remote_functions_and_actors: A list of information for exporting
+            remote functions and actor classes definitions that were defined
+            before the worker called connect. When the worker eventually does
+            call connect, if it is a driver, it will export these functions and
+            actors. If cached_remote_functions_and_actors is None, that means
+            that connect has been called already.
        cached_functions_to_run (List): A list of functions to run on all of
            the workers that should be exported as soon as connect is called.
    """
@@ -221,7 +219,7 @@ class Worker(object):
        self.num_task_executions = collections.defaultdict(lambda: {})
        self.connected = False
        self.mode = None
-        self.cached_remote_functions = []
+        self.cached_remote_functions_and_actors = []
        self.cached_functions_to_run = []
        self.fetch_and_register_actor = None
        self.make_actor = None
@@ -454,7 +452,8 @@ class Worker(object):
        assert len(final_results) == len(object_ids)
        return final_results

-    def submit_task(self, function_id, args, actor_id=None, actor_counter=0,
+    def submit_task(self, function_id, args, actor_id=None,
+                    actor_handle_id=None, actor_counter=0,
                    is_actor_checkpoint_method=False):
        """Submit a remote task to the scheduler.

@@ -474,14 +473,21 @@ class Worker(object):
        """
        with log_span("ray:submit_task", worker=self):
            check_main_thread()
-            actor_id = (ray.local_scheduler.ObjectID(NIL_ACTOR_ID)
-                        if actor_id is None else actor_id)
+            if actor_id is None:
+                assert actor_handle_id is None
+                actor_id = ray.local_scheduler.ObjectID(NIL_ACTOR_ID)
+                actor_handle_id = ray.local_scheduler.ObjectID(NIL_ACTOR_ID)
+            else:
+                assert actor_handle_id is not None
            # Put large or complex arguments that are passed by value in the
            # object store first.
            args_for_local_scheduler = []
            for arg in args:
                if isinstance(arg, ray.local_scheduler.ObjectID):
                    args_for_local_scheduler.append(arg)
+                elif isinstance(arg, ray.actor.ActorHandleParent):
+                    args_for_local_scheduler.append(put(
+                        ray.actor.wrap_actor_handle(arg)))
                elif ray.local_scheduler.check_simple_value(arg):
                    args_for_local_scheduler.append(arg)
                else:
@@ -500,6 +506,7 @@ class Worker(object):
                self.current_task_id,
                self.task_index,
                actor_id,
+                actor_handle_id,
                actor_counter,
                is_actor_checkpoint_method,
                [function_properties.num_cpus, function_properties.num_gpus,
@@ -655,6 +662,8 @@ class Worker(object):
                    # created this object failed, and we should propagate the
                    # error message here.
                    raise RayGetArgumentError(function_name, i, arg, argument)
+                elif isinstance(argument, ray.actor.ActorHandleWrapper):
+                    argument = ray.actor.unwrap_actor_handle(self, argument)
            else:
                # pass the argument by value
                argument = arg
@@ -1098,6 +1107,8 @@ def _initialize_serialization(worker=global_worker):
        _register_class(type(lambda: 0), use_pickle=True)
        # Tell Ray to serialize types with pickle.
        _register_class(type(int), use_pickle=True)
+        # Ray can serialize actor handles that have been wrapped.
+        _register_class(ray.actor.ActorHandleWrapper)


 def get_address_info_from_redis_helper(redis_address, node_ip_address):
@@ -1704,7 +1715,7 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker,
    error_message = "Perhaps you called ray.init twice by accident?"
    assert not worker.connected, error_message
    assert worker.cached_functions_to_run is not None, error_message
-    assert worker.cached_remote_functions is not None, error_message
+    assert worker.cached_remote_functions_and_actors is not None, error_message
    # Initialize some fields.
    worker.worker_id = random_string()
    worker.actor_id = actor_id
@@ -1840,6 +1851,7 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker,
            worker.current_task_id,
            worker.task_index,
            ray.local_scheduler.ObjectID(NIL_ACTOR_ID),
+            ray.local_scheduler.ObjectID(NIL_ACTOR_ID),
            nil_actor_counter,
            False,
            [0, 0, 0])
@@ -1913,23 +1925,32 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker,
        for function in worker.cached_functions_to_run:
            worker.run_function_on_all_workers(function)
        # Export cached remote functions to the workers.
-        for info in worker.cached_remote_functions:
-            (function_id, func_name, func,
-             func_invoker, function_properties) = info
-            export_remote_function(function_id, func_name, func, func_invoker,
-                                   function_properties, worker)
+        for cached_type, info in worker.cached_remote_functions_and_actors:
+            if cached_type == "remote_function":
+                (function_id, func_name, func,
+                 func_invoker, function_properties) = info
+                export_remote_function(function_id, func_name, func,
+                                       func_invoker, function_properties,
+                                       worker)
+            elif cached_type == "actor":
+                (key, actor_class_info) = info
+                ray.actor.publish_actor_class_to_key(key, actor_class_info,
+                                                     worker)
+            else:
+                assert False, "This code should be unreachable."
    worker.cached_functions_to_run = None
-    worker.cached_remote_functions = None
+    worker.cached_remote_functions_and_actors = None


 def disconnect(worker=global_worker):
    """Disconnect this worker from the scheduler and object store."""
-    # Reset the list of cached remote functions so that if more remote
-    # functions are defined and then connect is called again, the remote
-    # functions will be exported. This is mostly relevant for the tests.
+    # Reset the list of cached remote functions and actors so that if more
+    # remote functions or actors are defined and then connect is called again,
+    # the remote functions will be exported. This is mostly relevant for the
+    # tests.
    worker.connected = False
    worker.cached_functions_to_run = []
-    worker.cached_remote_functions = []
+    worker.cached_remote_functions_and_actors = []
    worker.serialization_context = pyarrow.SerializationContext()


@@ -2381,9 +2402,9 @@ def remote(*args, **kwargs):
                export_remote_function(function_id, func_name, func,
                                       func_invoker, function_properties)
            elif worker.mode is None:
-                worker.cached_remote_functions.append((function_id, func_name,
-                                                       func, func_invoker,
-                                                       function_properties))
+                worker.cached_remote_functions_and_actors.append(
+                    ("remote_function", (function_id, func_name, func,
+                                         func_invoker, function_properties)))
            return func_invoker

        return remote_decorator