[xray] Integrate worker.py with raylet. (#1810)

* Integrate worker with raylet. * Begin allowing worker to attach to cluster. * Fix linting and documentation. * Fix linting. * Comment tests back in. * Fix type of worker command. * Remove xray python files and tests. * Fix from rebase. * Add test. * Copy over raylet executable. * Small cleanup.
2026-06-28 13:02:16 +08:00 · 2018-04-03 02:38:56 -07:00
parent 0fc989c6c1
commit fbfbb1c079
22 changed files with 459 additions and 506 deletions
@@ -86,11 +86,13 @@ def cli():
              help="enable support for huge pages in the object store")
@click.option("--autoscaling-config", required=False, type=str,
              help="the file that contains the autoscaling config")
+@click.option("--use-raylet", is_flag=True, default=False,
+              help="use the raylet code path, this is not supported yet")
 def start(node_ip_address, redis_address, redis_port, num_redis_shards,
          redis_max_clients, redis_shard_ports, object_manager_port,
          object_store_memory, num_workers, num_cpus, num_gpus, resources,
          head, no_ui, block, plasma_directory, huge_pages,
-          autoscaling_config):
+          autoscaling_config, use_raylet):
    # Convert hostnames to numerical IP address.
    if node_ip_address is not None:
        node_ip_address = services.address_to_ip(node_ip_address)
@@ -161,7 +163,8 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
            include_webui=(not no_ui),
            plasma_directory=plasma_directory,
            huge_pages=huge_pages,
-            autoscaling_config=autoscaling_config)
+            autoscaling_config=autoscaling_config,
+            use_raylet=use_raylet)
        print(address_info)
        print("\nStarted Ray on this node. You can add additional nodes to "
              "the cluster by calling\n\n"
@@ -227,7 +230,8 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
            redirect_output=True,
            resources=resources,
            plasma_directory=plasma_directory,
-            huge_pages=huge_pages)
+            huge_pages=huge_pages,
+            use_raylet=use_raylet)
        print(address_info)
        print("\nStarted Ray on this node. If you wish to terminate the "
              "processes that have been started, run\n\n"
@@ -242,7 +246,7 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
@click.command()
 def stop():
    subprocess.call(["killall global_scheduler plasma_store plasma_manager "
-                     "local_scheduler"], shell=True)
+                     "local_scheduler raylet"], shell=True)

    # Find the PID of the monitor process and kill it.
    subprocess.call(["kill $(ps aux | grep monitor.py | grep -v grep | "
@@ -28,6 +28,7 @@ import ray.global_scheduler as global_scheduler
 PROCESS_TYPE_MONITOR = "monitor"
 PROCESS_TYPE_LOG_MONITOR = "log_monitor"
 PROCESS_TYPE_WORKER = "worker"
+PROCESS_TYPE_RAYLET = "raylet"
 PROCESS_TYPE_LOCAL_SCHEDULER = "local_scheduler"
 PROCESS_TYPE_PLASMA_MANAGER = "plasma_manager"
 PROCESS_TYPE_PLASMA_STORE = "plasma_store"
@@ -43,6 +44,7 @@ PROCESS_TYPE_WEB_UI = "web_ui"
 all_processes = OrderedDict([(PROCESS_TYPE_MONITOR, []),
                             (PROCESS_TYPE_LOG_MONITOR, []),
                             (PROCESS_TYPE_WORKER, []),
+                             (PROCESS_TYPE_RAYLET, []),
                             (PROCESS_TYPE_LOCAL_SCHEDULER, []),
                             (PROCESS_TYPE_PLASMA_MANAGER, []),
                             (PROCESS_TYPE_PLASMA_STORE, []),
@@ -51,6 +53,7 @@ all_processes = OrderedDict([(PROCESS_TYPE_MONITOR, []),
                             (PROCESS_TYPE_WEB_UI, [])],)

 # True if processes are run in the valgrind profiler.
+RUN_RAYLET_PROFILER = False
 RUN_LOCAL_SCHEDULER_PROFILER = False
 RUN_PLASMA_MANAGER_PROFILER = False
 RUN_PLASMA_STORE_PROFILER = False
@@ -74,6 +77,10 @@ CREDIS_MEMBER_MODULE = os.path.join(
    os.path.abspath(os.path.dirname(__file__)),
    "core/src/credis/build/src/libmember.so")

+# Location of the raylet executable.
+RAYLET_EXECUTABLE = os.path.join(
+    os.path.abspath(os.path.dirname(__file__)),
+    "core/src/ray/raylet/raylet")

 # ObjectStoreAddress tuples contain all information necessary to connect to an
 # object store. The fields are:
@@ -123,8 +130,8 @@ def kill_process(p):
    if p.poll() is not None:
        # The process has already terminated.
        return True
-    if any([RUN_LOCAL_SCHEDULER_PROFILER, RUN_PLASMA_MANAGER_PROFILER,
-            RUN_PLASMA_STORE_PROFILER]):
+    if any([RUN_RAYLET_PROFILER, RUN_LOCAL_SCHEDULER_PROFILER,
+            RUN_PLASMA_MANAGER_PROFILER, RUN_PLASMA_STORE_PROFILER]):
        # Give process signal to write profiler data.
        os.kill(p.pid, signal.SIGINT)
        # Wait for profiling data to be written.
@@ -860,12 +867,73 @@ def start_local_scheduler(redis_address,
    return local_scheduler_name


+def start_raylet(redis_address,
+                 node_ip_address,
+                 plasma_store_name,
+                 worker_path,
+                 stdout_file=None,
+                 stderr_file=None,
+                 cleanup=True):
+    """Start a raylet, which is a combined local scheduler and object manager.
+
+    Args:
+        redis_address (str): The address of the Redis instance.
+        node_ip_address (str): The IP address of the node that this local
+            scheduler is running on.
+        plasma_store_name (str): The name of the plasma store socket to connect
+            to.
+        worker_path (str): The path of the script to use when the local
+            scheduler starts up new workers.
+        stdout_file: A file handle opened for writing to redirect stdout to. If
+            no redirection should happen, then this should be None.
+        stderr_file: A file handle opened for writing to redirect stderr to. If
+            no redirection should happen, then this should be None.
+        cleanup (bool): True if using Ray in local mode. If cleanup is true,
+            then this process will be killed by serices.cleanup() when the
+            Python process that imported services exits.
+
+    Returns:
+        The raylet socket name.
+    """
+    gcs_ip_address, gcs_port = redis_address.split(":")
+    raylet_name = "/tmp/raylet{}".format(random_name())
+
+    # Create the command that the Raylet will use to start workers.
+    start_worker_command = ("{} {} "
+                            "--node-ip-address={} "
+                            "--object-store-name={} "
+                            "--raylet-name={} "
+                            "--redis-address={}"
+                            .format(sys.executable,
+                                    worker_path,
+                                    node_ip_address,
+                                    plasma_store_name,
+                                    raylet_name,
+                                    redis_address))
+
+    command = [RAYLET_EXECUTABLE,
+               raylet_name,
+               plasma_store_name,
+               node_ip_address,
+               gcs_ip_address,
+               gcs_port,
+               start_worker_command]
+    pid = subprocess.Popen(command, stdout=stdout_file, stderr=stderr_file)
+
+    if cleanup:
+        all_processes[PROCESS_TYPE_RAYLET].append(pid)
+    record_log_files_in_redis(redis_address, node_ip_address,
+                              [stdout_file, stderr_file])
+
+    return raylet_name
+
+
 def start_objstore(node_ip_address, redis_address,
                   object_manager_port=None, store_stdout_file=None,
                   store_stderr_file=None, manager_stdout_file=None,
                   manager_stderr_file=None, objstore_memory=None,
                   cleanup=True, plasma_directory=None,
-                   huge_pages=False):
+                   huge_pages=False, use_raylet=False):
    """This method starts an object store process.

    Args:
@@ -893,6 +961,8 @@ def start_objstore(node_ip_address, redis_address,
            be created.
        huge_pages: Boolean flag indicating whether to start the Object
            Store with hugetlbfs support. Requires plasma_directory.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.

    Return:
        A tuple of the Plasma store socket name, the Plasma manager socket
@@ -936,33 +1006,41 @@ def start_objstore(node_ip_address, redis_address,
        plasma_directory=plasma_directory,
        huge_pages=huge_pages)
    # Start the plasma manager.
-    if object_manager_port is not None:
-        (plasma_manager_name, p2,
-         plasma_manager_port) = ray.plasma.start_plasma_manager(
-            plasma_store_name,
-            redis_address,
-            plasma_manager_port=object_manager_port,
-            node_ip_address=node_ip_address,
-            num_retries=1,
-            run_profiler=RUN_PLASMA_MANAGER_PROFILER,
-            stdout_file=manager_stdout_file,
-            stderr_file=manager_stderr_file)
-        assert plasma_manager_port == object_manager_port
+    if not use_raylet:
+        if object_manager_port is not None:
+            (plasma_manager_name, p2,
+             plasma_manager_port) = ray.plasma.start_plasma_manager(
+                plasma_store_name,
+                redis_address,
+                plasma_manager_port=object_manager_port,
+                node_ip_address=node_ip_address,
+                num_retries=1,
+                run_profiler=RUN_PLASMA_MANAGER_PROFILER,
+                stdout_file=manager_stdout_file,
+                stderr_file=manager_stderr_file)
+            assert plasma_manager_port == object_manager_port
+        else:
+            (plasma_manager_name, p2,
+             plasma_manager_port) = ray.plasma.start_plasma_manager(
+                plasma_store_name,
+                redis_address,
+                node_ip_address=node_ip_address,
+                run_profiler=RUN_PLASMA_MANAGER_PROFILER,
+                stdout_file=manager_stdout_file,
+                stderr_file=manager_stderr_file)
    else:
-        (plasma_manager_name, p2,
-         plasma_manager_port) = ray.plasma.start_plasma_manager(
-            plasma_store_name,
-            redis_address,
-            node_ip_address=node_ip_address,
-            run_profiler=RUN_PLASMA_MANAGER_PROFILER,
-            stdout_file=manager_stdout_file,
-            stderr_file=manager_stderr_file)
+        plasma_manager_port = None
+        plasma_manager_name = None
+
    if cleanup:
        all_processes[PROCESS_TYPE_PLASMA_STORE].append(p1)
-        all_processes[PROCESS_TYPE_PLASMA_MANAGER].append(p2)
    record_log_files_in_redis(redis_address, node_ip_address,
-                              [store_stdout_file, store_stderr_file,
-                               manager_stdout_file, manager_stderr_file])
+                              [store_stdout_file, store_stderr_file])
+    if not use_raylet:
+        if cleanup:
+            all_processes[PROCESS_TYPE_PLASMA_MANAGER].append(p2)
+        record_log_files_in_redis(redis_address, node_ip_address,
+                                  [manager_stdout_file, manager_stderr_file])

    return ObjectStoreAddress(plasma_store_name, plasma_manager_name,
                              plasma_manager_port)
@@ -1059,7 +1137,8 @@ def start_ray_processes(address_info=None,
                        resources=None,
                        plasma_directory=None,
                        huge_pages=False,
-                        autoscaling_config=None):
+                        autoscaling_config=None,
+                        use_raylet=False):
    """Helper method to start Ray processes.

    Args:
@@ -1112,6 +1191,8 @@ def start_ray_processes(address_info=None,
        huge_pages: Boolean flag indicating whether to start the Object
            Store with hugetlbfs support. Requires plasma_directory.
        autoscaling_config: path to autoscaling config file.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.

    Returns:
        A dictionary of the address information for the processes that were
@@ -1193,7 +1274,7 @@ def start_ray_processes(address_info=None,
                          cleanup=cleanup)

    # Start the global scheduler, if necessary.
-    if include_global_scheduler:
+    if include_global_scheduler and not use_raylet:
        global_scheduler_stdout_file, global_scheduler_stderr_file = (
            new_log_files("global_scheduler", redirect_output))
        start_global_scheduler(redis_address,
@@ -1235,71 +1316,90 @@ def start_ray_processes(address_info=None,
            manager_stderr_file=plasma_manager_stderr_file,
            objstore_memory=object_store_memory,
            cleanup=cleanup, plasma_directory=plasma_directory,
-            huge_pages=huge_pages)
+            huge_pages=huge_pages,
+            use_raylet=use_raylet)
        object_store_addresses.append(object_store_address)
        time.sleep(0.1)

    # Start any local schedulers that do not yet exist.
-    for i in range(len(local_scheduler_socket_names), num_local_schedulers):
-        # Connect the local scheduler to the object store at the same index.
-        object_store_address = object_store_addresses[i]
-        plasma_address = "{}:{}".format(node_ip_address,
-                                        object_store_address.manager_port)
-        # Determine how many workers this local scheduler should start.
-        if start_workers_from_local_scheduler:
-            num_local_scheduler_workers = workers_per_local_scheduler[i]
-            workers_per_local_scheduler[i] = 0
-        else:
-            # If we're starting the workers from Python, the local scheduler
-            # should not start any workers.
-            num_local_scheduler_workers = 0
-        # Start the local scheduler. Note that if we do not wish to redirect
-        # the worker output, then we cannot redirect the local scheduler
-        # output.
-        local_scheduler_stdout_file, local_scheduler_stderr_file = (
-            new_log_files("local_scheduler_{}".format(i),
-                          redirect_output=redirect_worker_output))
-        local_scheduler_name = start_local_scheduler(
+    if not use_raylet:
+        for i in range(len(local_scheduler_socket_names),
+                       num_local_schedulers):
+            # Connect the local scheduler to the object store at the same
+            # index.
+            object_store_address = object_store_addresses[i]
+            plasma_address = "{}:{}".format(node_ip_address,
+                                            object_store_address.manager_port)
+            # Determine how many workers this local scheduler should start.
+            if start_workers_from_local_scheduler:
+                num_local_scheduler_workers = workers_per_local_scheduler[i]
+                workers_per_local_scheduler[i] = 0
+            else:
+                # If we're starting the workers from Python, the local
+                # scheduler should not start any workers.
+                num_local_scheduler_workers = 0
+            # Start the local scheduler. Note that if we do not wish to
+            # redirect the worker output, then we cannot redirect the local
+            # scheduler output.
+            local_scheduler_stdout_file, local_scheduler_stderr_file = (
+                new_log_files("local_scheduler_{}".format(i),
+                              redirect_output=redirect_worker_output))
+            local_scheduler_name = start_local_scheduler(
+                redis_address,
+                node_ip_address,
+                object_store_address.name,
+                object_store_address.manager_name,
+                worker_path,
+                plasma_address=plasma_address,
+                stdout_file=local_scheduler_stdout_file,
+                stderr_file=local_scheduler_stderr_file,
+                cleanup=cleanup,
+                resources=resources[i],
+                num_workers=num_local_scheduler_workers)
+            local_scheduler_socket_names.append(local_scheduler_name)
+
+        # Make sure that we have exactly num_local_schedulers instances of
+        # object stores and local schedulers.
+        assert len(object_store_addresses) == num_local_schedulers
+        assert len(local_scheduler_socket_names) == num_local_schedulers
+
+    else:
+        # Start the raylet. TODO(rkn): Modify this to allow starting
+        # multiple raylets on the same machine.
+        raylet_stdout_file, raylet_stderr_file = (
+            new_log_files("raylet_{}".format(i),
+                          redirect_output=redirect_output))
+        address_info["raylet_socket_name"] = start_raylet(
            redis_address,
            node_ip_address,
-            object_store_address.name,
-            object_store_address.manager_name,
+            object_store_addresses[i].name,
            worker_path,
-            plasma_address=plasma_address,
-            stdout_file=local_scheduler_stdout_file,
-            stderr_file=local_scheduler_stderr_file,
-            cleanup=cleanup,
-            resources=resources[i],
-            num_workers=num_local_scheduler_workers)
-        local_scheduler_socket_names.append(local_scheduler_name)
-        time.sleep(0.1)
+            stdout_file=None,
+            stderr_file=None,
+            cleanup=cleanup)

-    # Make sure that we have exactly num_local_schedulers instances of object
-    # stores and local schedulers.
-    assert len(object_store_addresses) == num_local_schedulers
-    assert len(local_scheduler_socket_names) == num_local_schedulers
+    if not use_raylet:
+        # Start any workers that the local scheduler has not already started.
+        for i, num_local_scheduler_workers in enumerate(
+                workers_per_local_scheduler):
+            object_store_address = object_store_addresses[i]
+            local_scheduler_name = local_scheduler_socket_names[i]
+            for j in range(num_local_scheduler_workers):
+                worker_stdout_file, worker_stderr_file = new_log_files(
+                    "worker_{}_{}".format(i, j), redirect_output)
+                start_worker(node_ip_address,
+                             object_store_address.name,
+                             object_store_address.manager_name,
+                             local_scheduler_name,
+                             redis_address,
+                             worker_path,
+                             stdout_file=worker_stdout_file,
+                             stderr_file=worker_stderr_file,
+                             cleanup=cleanup)
+                workers_per_local_scheduler[i] -= 1

-    # Start any workers that the local scheduler has not already started.
-    for i, num_local_scheduler_workers in enumerate(
-            workers_per_local_scheduler):
-        object_store_address = object_store_addresses[i]
-        local_scheduler_name = local_scheduler_socket_names[i]
-        for j in range(num_local_scheduler_workers):
-            worker_stdout_file, worker_stderr_file = new_log_files(
-                "worker_{}_{}".format(i, j), redirect_output)
-            start_worker(node_ip_address,
-                         object_store_address.name,
-                         object_store_address.manager_name,
-                         local_scheduler_name,
-                         redis_address,
-                         worker_path,
-                         stdout_file=worker_stdout_file,
-                         stderr_file=worker_stderr_file,
-                         cleanup=cleanup)
-            workers_per_local_scheduler[i] -= 1
-
-    # Make sure that we've started all the workers.
-    assert(sum(workers_per_local_scheduler) == 0)
+        # Make sure that we've started all the workers.
+        assert(sum(workers_per_local_scheduler) == 0)

    # Try to start the web UI.
    if include_webui:
@@ -1327,7 +1427,8 @@ def start_ray_node(node_ip_address,
                   redirect_output=False,
                   resources=None,
                   plasma_directory=None,
-                   huge_pages=False):
+                   huge_pages=False,
+                   use_raylet=False):
    """Start the Ray processes for a single node.

    This assumes that the Ray processes on some master node have already been
@@ -1360,6 +1461,8 @@ def start_ray_node(node_ip_address,
            be created.
        huge_pages: Boolean flag indicating whether to start the Object
            Store with hugetlbfs support. Requires plasma_directory.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.

    Returns:
        A dictionary of the address information for the processes that were
@@ -1400,7 +1503,8 @@ def start_ray_head(address_info=None,
                   include_webui=True,
                   plasma_directory=None,
                   huge_pages=False,
-                   autoscaling_config=None):
+                   autoscaling_config=None,
+                   use_raylet=False):
    """Start Ray in local mode.

    Args:
@@ -1447,6 +1551,8 @@ def start_ray_head(address_info=None,
        huge_pages: Boolean flag indicating whether to start the Object
            Store with hugetlbfs support. Requires plasma_directory.
        autoscaling_config: path to autoscaling config file.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.

    Returns:
        A dictionary of the address information for the processes that were
@@ -1474,7 +1580,8 @@ def start_ray_head(address_info=None,
        redis_max_clients=redis_max_clients,
        plasma_directory=plasma_directory,
        huge_pages=huge_pages,
-        autoscaling_config=autoscaling_config)
+        autoscaling_config=autoscaling_config,
+        use_raylet=use_raylet)


 def try_to_create_directory(directory_path):
@@ -31,6 +31,9 @@ import ray.plasma
 from ray.utils import (FunctionProperties, random_string, binary_to_hex,
                       is_cython)

+# Import flatbuffer bindings.
+from ray.core.generated.ClientTableData import ClientTableData
+
 SCRIPT_MODE = 0
 WORKER_MODE = 1
 PYTHON_MODE = 2
@@ -50,6 +53,7 @@ NIL_LOCAL_SCHEDULER_ID = NIL_ID
 NIL_FUNCTION_ID = NIL_ID
 NIL_ACTOR_ID = NIL_ID
 NIL_ACTOR_HANDLE_ID = NIL_ID
+NIL_CLIENT_ID = 20 * b"\xff"

 # This must be kept in sync with the `error_types` array in
 # common/state/error_table.h.
@@ -452,9 +456,12 @@ class Worker(object):
                            for object_id in object_ids]
        for i in range(0, len(object_ids),
                       ray._config.worker_fetch_request_size()):
-            self.plasma_client.fetch(
-                plain_object_ids[i:(i +
-                                    ray._config.worker_fetch_request_size())])
+            if not self.use_raylet:
+                self.plasma_client.fetch(
+                    plain_object_ids
+                    [i:(i + ray._config.worker_fetch_request_size())])
+            else:
+                print("plasma_client.fetch has not been implemented yet")

        # Get the objects. We initially try to get the objects immediately.
        final_results = self.retrieve_and_deserialize(plain_object_ids, 0)
@@ -478,9 +485,12 @@ class Worker(object):
                plasma.ObjectID, unready_ids.keys()))
            for i in range(0, len(object_ids_to_fetch),
                           ray._config.worker_fetch_request_size()):
-                self.plasma_client.fetch(
-                    object_ids_to_fetch[i:(
-                        i + ray._config.worker_fetch_request_size())])
+                if not self.use_raylet:
+                    self.plasma_client.fetch(
+                        object_ids_to_fetch[i:(
+                            i + ray._config.worker_fetch_request_size())])
+                else:
+                    print("plasma_client.fetch has not been implemented yet")
            results = self.retrieve_and_deserialize(
                object_ids_to_fetch,
                max([ray._config.get_timeout_milliseconds(),
@@ -496,7 +506,7 @@ class Worker(object):

        # If there were objects that we weren't able to get locally, let the
        # local scheduler know that we're now unblocked.
-        if was_blocked:
+        if was_blocked and not self.use_raylet:
            self.local_scheduler_client.notify_unblocked()

        assert len(final_results) == len(object_ids)
@@ -1150,70 +1160,108 @@ def _initialize_serialization(worker=global_worker):
                                   use_dict=True)


-def get_address_info_from_redis_helper(redis_address, node_ip_address):
+def get_address_info_from_redis_helper(redis_address, node_ip_address,
+                                       use_raylet=False):
    redis_ip_address, redis_port = redis_address.split(":")
    # For this command to work, some other client (on the same machine as
    # Redis) must have run "CONFIG SET protected-mode no".
    redis_client = redis.StrictRedis(host=redis_ip_address,
                                     port=int(redis_port))
-    # The client table prefix must be kept in sync with the file
-    # "src/common/redis_module/ray_redis_module.cc" where it is defined.
-    REDIS_CLIENT_TABLE_PREFIX = "CL:"
-    client_keys = redis_client.keys("{}*".format(REDIS_CLIENT_TABLE_PREFIX))
-    # Filter to live clients on the same node and do some basic checking.
-    plasma_managers = []
-    local_schedulers = []
-    for key in client_keys:
-        info = redis_client.hgetall(key)

-        # Ignore clients that were deleted.
-        deleted = info[b"deleted"]
-        deleted = bool(int(deleted))
-        if deleted:
-            continue
+    if not use_raylet:
+        # The client table prefix must be kept in sync with the file
+        # "src/common/redis_module/ray_redis_module.cc" where it is defined.
+        REDIS_CLIENT_TABLE_PREFIX = "CL:"
+        client_keys = redis_client.keys(
+            "{}*".format(REDIS_CLIENT_TABLE_PREFIX))
+        # Filter to live clients on the same node and do some basic checking.
+        plasma_managers = []
+        local_schedulers = []
+        for key in client_keys:
+            info = redis_client.hgetall(key)

-        assert b"ray_client_id" in info
-        assert b"node_ip_address" in info
-        assert b"client_type" in info
-        client_node_ip_address = info[b"node_ip_address"].decode("ascii")
-        if (client_node_ip_address == node_ip_address or
-                (client_node_ip_address == "127.0.0.1" and
-                 redis_ip_address == ray.services.get_node_ip_address())):
-            if info[b"client_type"].decode("ascii") == "plasma_manager":
-                plasma_managers.append(info)
-            elif info[b"client_type"].decode("ascii") == "local_scheduler":
-                local_schedulers.append(info)
-    # Make sure that we got at least one plasma manager and local scheduler.
-    assert len(plasma_managers) >= 1
-    assert len(local_schedulers) >= 1
-    # Build the address information.
-    object_store_addresses = []
-    for manager in plasma_managers:
-        address = manager[b"manager_address"].decode("ascii")
-        port = services.get_port(address)
-        object_store_addresses.append(
-            services.ObjectStoreAddress(
-                name=manager[b"store_socket_name"].decode("ascii"),
-                manager_name=manager[b"manager_socket_name"].decode("ascii"),
-                manager_port=port))
-    scheduler_names = [
-        scheduler[b"local_scheduler_socket_name"].decode("ascii")
-        for scheduler in local_schedulers]
-    client_info = {"node_ip_address": node_ip_address,
-                   "redis_address": redis_address,
-                   "object_store_addresses": object_store_addresses,
-                   "local_scheduler_socket_names": scheduler_names,
-                   # Web UI should be running.
-                   "webui_url": _webui_url_helper(redis_client)}
-    return client_info
+            # Ignore clients that were deleted.
+            deleted = info[b"deleted"]
+            deleted = bool(int(deleted))
+            if deleted:
+                continue
+
+            assert b"ray_client_id" in info
+            assert b"node_ip_address" in info
+            assert b"client_type" in info
+            client_node_ip_address = info[b"node_ip_address"].decode("ascii")
+            if (client_node_ip_address == node_ip_address or
+                    (client_node_ip_address == "127.0.0.1" and
+                     redis_ip_address == ray.services.get_node_ip_address())):
+                if info[b"client_type"].decode("ascii") == "plasma_manager":
+                    plasma_managers.append(info)
+                elif info[b"client_type"].decode("ascii") == "local_scheduler":
+                    local_schedulers.append(info)
+        # Make sure that we got at least one plasma manager and local
+        # scheduler.
+        assert len(plasma_managers) >= 1
+        assert len(local_schedulers) >= 1
+        # Build the address information.
+        object_store_addresses = []
+        for manager in plasma_managers:
+            address = manager[b"manager_address"].decode("ascii")
+            port = services.get_port(address)
+            object_store_addresses.append(
+                services.ObjectStoreAddress(
+                    name=manager[b"store_socket_name"].decode("ascii"),
+                    manager_name=manager[b"manager_socket_name"].decode(
+                        "ascii"),
+                    manager_port=port))
+        scheduler_names = [
+            scheduler[b"local_scheduler_socket_name"].decode("ascii")
+            for scheduler in local_schedulers]
+        client_info = {"node_ip_address": node_ip_address,
+                       "redis_address": redis_address,
+                       "object_store_addresses": object_store_addresses,
+                       "local_scheduler_socket_names": scheduler_names,
+                       # Web UI should be running.
+                       "webui_url": _webui_url_helper(redis_client)}
+        return client_info
+
+    # Handle the raylet case.
+    else:
+        # In the raylet code path, all client data is stored in a zset at the
+        # key for the nil client.
+        client_key = b"CLIENT:" + NIL_CLIENT_ID
+        clients = redis_client.zrange(client_key, 0, -1)
+        raylets = []
+        for client_message in clients:
+            client = ClientTableData.GetRootAsClientTableData(client_message,
+                                                              0)
+            client_node_ip_address = client.NodeManagerAddress().decode(
+                "ascii")
+            if (client_node_ip_address == node_ip_address or
+                    (client_node_ip_address == "127.0.0.1" and
+                     redis_ip_address == ray.services.get_node_ip_address())):
+                raylets.append(client)
+
+        # TODO(rkn): The ObjectStoreSocketName field does not exist.
+        object_store_addresses = [
+            raylet.ObjectStoreSocketName().decode("ascii")
+            for raylet in raylets]
+        raylet_socket_names = [raylet.NodeManagerAddress().decode("ascii") for
+                               raylet in raylets]
+        return {"node_ip_address": node_ip_address,
+                "redis_address": redis_address,
+                "object_store_addresses": object_store_addresses,
+                "raylet_socket_names": raylet_socket_names,
+                # Web UI should be running.
+                "webui_url": _webui_url_helper(redis_client)}


-def get_address_info_from_redis(redis_address, node_ip_address, num_retries=5):
+def get_address_info_from_redis(redis_address, node_ip_address, num_retries=5,
+                                use_raylet=False):
    counter = 0
    while True:
        try:
            return get_address_info_from_redis_helper(redis_address,
-                                                      node_ip_address)
+                                                      node_ip_address,
+                                                      use_raylet=use_raylet)
        except Exception as e:
            if counter == num_retries:
                raise
@@ -1281,7 +1329,8 @@ def _init(address_info=None,
          redis_max_clients=None,
          plasma_directory=None,
          huge_pages=False,
-          include_webui=True):
+          include_webui=True,
+          use_raylet=False):
    """Helper method to connect to an existing Ray cluster or start a new one.

    This method handles two cases. Either a Ray cluster already exists and we
@@ -1336,6 +1385,8 @@ def _init(address_info=None,
            Store with hugetlbfs support. Requires plasma_directory.
        include_webui: Boolean flag indicating whether to start the web
            UI, which is a Jupyter notebook.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.

    Returns:
        Address information about the started processes.
@@ -1402,7 +1453,8 @@ def _init(address_info=None,
            redis_max_clients=redis_max_clients,
            plasma_directory=plasma_directory,
            huge_pages=huge_pages,
-            include_webui=include_webui)
+            include_webui=include_webui,
+            use_raylet=use_raylet)
    else:
        if redis_address is None:
            raise Exception("When connecting to an existing cluster, "
@@ -1439,7 +1491,8 @@ def _init(address_info=None,
            node_ip_address = services.get_node_ip_address(redis_address)
        # Get the address info of the processes to connect to from Redis.
        address_info = get_address_info_from_redis(redis_address,
-                                                   node_ip_address)
+                                                   node_ip_address,
+                                                   use_raylet=use_raylet)

    # Connect this driver to Redis, the object store, and the local scheduler.
    # Choose the first object store and local scheduler if there are multiple.
@@ -1453,13 +1506,17 @@ def _init(address_info=None,
            "redis_address": address_info["redis_address"],
            "store_socket_name": (
                address_info["object_store_addresses"][0].name),
-            "manager_socket_name": (
-                address_info["object_store_addresses"][0].manager_name),
-            "local_scheduler_socket_name": (
-                address_info["local_scheduler_socket_names"][0]),
            "webui_url": address_info["webui_url"]}
+        if not use_raylet:
+            driver_address_info["manager_socket_name"] = (
+                address_info["object_store_addresses"][0].manager_name)
+            driver_address_info["local_scheduler_socket_name"] = (
+                address_info["local_scheduler_socket_names"][0])
+        else:
+            driver_address_info["raylet_socket_name"] = (
+                address_info["raylet_socket_name"])
    connect(driver_address_info, object_id_seed=object_id_seed,
-            mode=driver_mode, worker=global_worker)
+            mode=driver_mode, worker=global_worker, use_raylet=use_raylet)
    return address_info


@@ -1469,7 +1526,8 @@ def init(redis_address=None, node_ip_address=None, object_id_seed=None,
         num_cpus=None, num_gpus=None, resources=None,
         num_custom_resource=None, num_redis_shards=None,
         redis_max_clients=None, plasma_directory=None,
-         huge_pages=False, include_webui=True, object_store_memory=None):
+         huge_pages=False, include_webui=True, object_store_memory=None,
+         use_raylet=False):
    """Connect to an existing Ray cluster or start one and connect to it.

    This method handles two cases. Either a Ray cluster already exists and we
@@ -1513,6 +1571,9 @@ def init(redis_address=None, node_ip_address=None, object_id_seed=None,
            UI, which is a Jupyter notebook.
        object_store_memory: The amount of memory (in bytes) to start the
            object store with.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.
+

    Returns:
        Address information about the started processes.
@@ -1539,7 +1600,8 @@ def init(redis_address=None, node_ip_address=None, object_id_seed=None,
                 plasma_directory=plasma_directory,
                 huge_pages=huge_pages,
                 include_webui=include_webui,
-                 object_store_memory=object_store_memory)
+                 object_store_memory=object_store_memory,
+                 use_raylet=use_raylet)


 def cleanup(worker=global_worker):
@@ -1818,7 +1880,8 @@ def import_thread(worker, mode):
        pass


-def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker):
+def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker,
+            use_raylet=False):
    """Connect this worker to the local scheduler, to Plasma, and to Redis.

    Args:
@@ -1828,6 +1891,8 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker):
            deterministic.
        mode: The mode of the worker. One of SCRIPT_MODE, WORKER_MODE,
            PYTHON_MODE, and SILENT_MODE.
+        use_raylet: True if the new raylet code path should be used. This is
+            not supported yet.
    """
    check_main_thread()
    # Do some basic checking to make sure we didn't call ray.init twice.
@@ -1842,6 +1907,7 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker):
    worker.actor_id = NIL_ACTOR_ID
    worker.connected = True
    worker.set_mode(mode)
+    worker.use_raylet = use_raylet
    # The worker.events field is used to aggregate logging information and
    # display it in the web UI. Note that Python lists protected by the GIL,
    # which is important because we will append to this field from multiple
@@ -1909,8 +1975,9 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker):
            "driver_id": worker.worker_id,
            "start_time": time.time(),
            "plasma_store_socket": info["store_socket_name"],
-            "plasma_manager_socket": info["manager_socket_name"],
-            "local_scheduler_socket": info["local_scheduler_socket_name"]}
+            "plasma_manager_socket": info.get("manager_socket_name"),
+            "local_scheduler_socket": info.get("local_scheduler_socket_name"),
+            "raylet_socket": info.get("raylet_socket_name")}
        driver_info["name"] = (main.__file__ if hasattr(main, "__file__")
                               else "INTERACTIVE MODE")
        worker.redis_client.hmset(b"Drivers:" + worker.worker_id, driver_info)
@@ -1933,11 +2000,22 @@ def connect(info, object_id_seed=None, mode=WORKER_MODE, worker=global_worker):
        raise Exception("This code should be unreachable.")

    # Create an object store client.
-    worker.plasma_client = plasma.connect(info["store_socket_name"],
-                                          info["manager_socket_name"],
-                                          64)
+    if not worker.use_raylet:
+        worker.plasma_client = plasma.connect(info["store_socket_name"],
+                                              info["manager_socket_name"],
+                                              64)
+    else:
+        worker.plasma_client = plasma.connect(info["store_socket_name"],
+                                              "",
+                                              64)
+
+    if not worker.use_raylet:
+        local_scheduler_socket = info["local_scheduler_socket_name"]
+    else:
+        local_scheduler_socket = info["raylet_socket_name"]
+
    worker.local_scheduler_client = ray.local_scheduler.LocalSchedulerClient(
-        info["local_scheduler_socket_name"], worker.worker_id, is_worker)
+        local_scheduler_socket, worker.worker_id, is_worker)

    # If this is a driver, set the current task ID, the task driver ID, and set
    # the task index to 0.
@@ -2275,9 +2353,10 @@ def flush_log(worker=global_worker):
    """Send the logged worker events to the global state store."""
    event_log_key = b"event_log:" + worker.worker_id
    event_log_value = json.dumps(worker.events)
-    worker.local_scheduler_client.log_event(event_log_key,
-                                            event_log_value,
-                                            time.time())
+    if not worker.use_raylet:
+        worker.local_scheduler_client.log_event(event_log_key,
+                                                event_log_value,
+                                                time.time())
    worker.events = []


@@ -2367,6 +2446,9 @@ def wait(object_ids, num_returns=1, timeout=None, worker=global_worker):
        A list of object IDs that are ready and a list of the remaining object
            IDs.
    """
+    if worker.use_raylet:
+        print("plasma_client.wait has not been implemented yet")
+        return

    if isinstance(object_ids, ray.local_scheduler.ObjectID):
        raise TypeError(
@@ -16,10 +16,12 @@ parser.add_argument("--redis-address", required=True, type=str,
                    help="the address to use for Redis")
 parser.add_argument("--object-store-name", required=True, type=str,
                    help="the object store's name")
-parser.add_argument("--object-store-manager-name", required=True, type=str,
+parser.add_argument("--object-store-manager-name", required=False, type=str,
                    help="the object store manager's name")
-parser.add_argument("--local-scheduler-name", required=True, type=str,
+parser.add_argument("--local-scheduler-name", required=False, type=str,
                    help="the local scheduler's name")
+parser.add_argument("--raylet-name", required=False, type=str,
+                    help="the raylet's name")


 if __name__ == "__main__":
@@ -29,9 +31,11 @@ if __name__ == "__main__":
            "redis_address": args.redis_address,
            "store_socket_name": args.object_store_name,
            "manager_socket_name": args.object_store_manager_name,
-            "local_scheduler_socket_name": args.local_scheduler_name}
+            "local_scheduler_socket_name": args.local_scheduler_name,
+            "raylet_socket_name": args.raylet_name}

-    ray.worker.connect(info, mode=ray.WORKER_MODE)
+    ray.worker.connect(info, mode=ray.WORKER_MODE,
+                       use_raylet=(args.raylet_name is not None))

    error_explanation = """
  This error is unexpected and should not have happened. Somehow a worker
@@ -23,6 +23,7 @@ ray_files = [
    "ray/core/src/local_scheduler/local_scheduler",
    "ray/core/src/local_scheduler/liblocal_scheduler_library.so",
    "ray/core/src/global_scheduler/global_scheduler",
+    "ray/core/src/ray/raylet/raylet",
    "ray/WebUI.ipynb"
 ]