Remove num_local_schedulers argument from ray.worker._init. (#3704)

* Remove num_local_schedulers argument from ray.worker._init. * Fix * Fix tests.
2026-06-28 17:34:51 +08:00 · 2019-01-07 12:44:49 -08:00
parent e78562b2e8
commit c9d70f0dda
18 changed files with 388 additions and 513 deletions
@@ -32,12 +32,8 @@ class RayParams(object):
            ignored.
        redis_shard_ports: A list of the ports to use for the non-primary Redis
            shards.
-        num_cpus (int): Number of cpus the user wishes all local schedulers to
-            be configured with.
-        num_gpus (int): Number of gpus the user wishes all local schedulers to
-            be configured with.
-        num_local_schedulers (int): The number of local schedulers to start.
-            This is only provided if start_ray_local is True.
+        num_cpus (int): Number of CPUs to configure the raylet with.
+        num_gpus (int): Number of GPUs to configure the raylet with.
        resources: A dictionary mapping the name of a resource to the quantity
            of that resource available.
        object_store_memory: The amount of memory (in bytes) to start the
@@ -46,12 +42,8 @@ class RayParams(object):
            to use, or None for no limit. Once the limit is exceeded, redis
            will start LRU eviction of entries. This only applies to the
            sharded redis tables (task and object tables).
-        object_manager_ports (list): A list of the ports to use for the object
-            managers. There should be one per object manager being started on
-            this node (typically just one).
-        node_manager_ports (list): A list of the ports to use for the node
-            managers. There should be one per node manager being started on
-            this node (typically just one).
+        object_manager_port int: The port to use for the object manager.
+        node_manager_port: The port to use for the node manager.
        node_ip_address (str): The IP address of the node that we are on.
        object_id_seed (int): Used to seed the deterministic generation of
            object IDs. The same value can be used across multiple runs of the
@@ -97,14 +89,13 @@ class RayParams(object):
                 redis_address=None,
                 num_cpus=None,
                 num_gpus=None,
-                 num_local_schedulers=None,
                 resources=None,
                 object_store_memory=None,
                 redis_max_memory=None,
                 redis_port=None,
                 redis_shard_ports=None,
-                 object_manager_ports=None,
-                 node_manager_ports=None,
+                 object_manager_port=None,
+                 node_manager_port=None,
                 node_ip_address=None,
                 object_id_seed=None,
                 num_workers=None,
@@ -133,14 +124,13 @@ class RayParams(object):
        self.redis_address = redis_address
        self.num_cpus = num_cpus
        self.num_gpus = num_gpus
-        self.num_local_schedulers = num_local_schedulers
        self.resources = resources
        self.object_store_memory = object_store_memory
        self.redis_max_memory = redis_max_memory
        self.redis_port = redis_port
        self.redis_shard_ports = redis_shard_ports
-        self.object_manager_ports = object_manager_ports
-        self.node_manager_ports = node_manager_ports
+        self.object_manager_port = object_manager_port
+        self.node_manager_port = node_manager_port
        self.node_ip_address = node_ip_address
        self.num_workers = num_workers
        self.local_mode = local_mode
@@ -160,6 +150,7 @@ class RayParams(object):
        self.include_log_monitor = include_log_monitor
        self.autoscaling_config = autoscaling_config
        self._internal_config = _internal_config
+        self._check_usage()

    def update(self, **kwargs):
        """Update the settings according to the keyword arguments.
@@ -174,6 +165,8 @@ class RayParams(object):
                raise ValueError("Invalid RayParams parameter in"
                                 " update: %s" % arg)

+        self._check_usage()
+
    def update_if_absent(self, **kwargs):
        """Update the settings when the target fields are None.

@@ -187,3 +180,14 @@ class RayParams(object):
            else:
                raise ValueError("Invalid RayParams parameter in"
                                 " update_if_absent: %s" % arg)
+
+        self._check_usage()
+
+    def _check_usage(self):
+        if self.resources is not None:
+            assert "CPU" not in self.resources, (
+                "'CPU' should not be included in the resource dictionary. Use "
+                "num_cpus instead.")
+            assert "GPU" not in self.resources, (
+                "'GPU' should not be included in the resource dictionary. Use "
+                "num_gpus instead.")
@@ -52,7 +52,7 @@ def create_parser(parser_creator=None):
        type=int,
        help="--num-gpus to use if starting a new cluster.")
    parser.add_argument(
-        "--ray-num-local-schedulers",
+        "--ray-num-nodes",
        default=None,
        type=int,
        help="Emulate multiple cluster nodes for debugging.")
@@ -122,9 +122,9 @@ def run(args, parser):
        if not exp.get("env") and not exp.get("config", {}).get("env"):
            parser.error("the following arguments are required: --env")

-    if args.ray_num_local_schedulers:
+    if args.ray_num_nodes:
        cluster = Cluster()
-        for _ in range(args.ray_num_local_schedulers):
+        for _ in range(args.ray_num_nodes):
            cluster.add_node(
                resources={
                    "num_cpus": args.ray_num_cpus or 1,
@@ -231,21 +231,17 @@ def start(node_ip_address, redis_address, redis_port, num_redis_shards,
                        "    --resources='{\"CustomResource1\": 3, "
                        "\"CustomReseource2\": 2}'")

-    assert "CPU" not in resources, "Use the --num-cpus argument."
-    assert "GPU" not in resources, "Use the --num-gpus argument."
-    if num_cpus is not None:
-        resources["CPU"] = num_cpus
-    if num_gpus is not None:
-        resources["GPU"] = num_gpus
    ray_params = RayParams(
        node_ip_address=node_ip_address,
-        object_manager_ports=[object_manager_port],
-        node_manager_ports=[node_manager_port],
+        object_manager_port=object_manager_port,
+        node_manager_port=node_manager_port,
        num_workers=num_workers,
        object_store_memory=object_store_memory,
        redis_password=redis_password,
        redirect_worker_output=not no_redirect_worker_output,
        redirect_output=not no_redirect_output,
+        num_cpus=num_cpus,
+        num_gpus=num_gpus,
        resources=resources,
        plasma_directory=plasma_directory,
        huge_pages=huge_pages,
@@ -828,10 +828,12 @@ def start_ui(redis_address, stdout_file=None, stderr_file=None, cleanup=True):
        return webui_url


-def check_and_update_resources(resources):
+def check_and_update_resources(num_cpus, num_gpus, resources):
    """Sanity check a resource dictionary and add sensible defaults.

    Args:
+        num_cpus: The number of CPUs.
+        num_gpus: The number of GPUs.
        resources: A dictionary mapping resource names to resource quantities.

    Returns:
@@ -840,6 +842,13 @@ def check_and_update_resources(resources):
    if resources is None:
        resources = {}
    resources = resources.copy()
+    assert "CPU" not in resources
+    assert "GPU" not in resources
+    if num_cpus is not None:
+        resources["CPU"] = num_cpus
+    if num_gpus is not None:
+        resources["GPU"] = num_gpus
+
    if "CPU" not in resources:
        # By default, use the number of hardware execution threads for the
        # number of cores.
@@ -879,10 +888,9 @@ def check_and_update_resources(resources):


 def start_raylet(ray_params,
-                 index,
                 raylet_name,
                 plasma_store_name,
-                 num_workers=0,
+                 num_initial_workers=0,
                 use_valgrind=False,
                 use_profiler=False,
                 stdout_file=None,
@@ -894,15 +902,13 @@ def start_raylet(ray_params,
    Args:
        ray_params (ray.params.RayParams): The RayParams instance. The
            following parameters could be checked: redis_address,
-            node_ip_address, worker_path, resources, object_manager_ports,
-            node_manager_ports, redis_password
-        index (int): Usually, this index is 0. When index > 0, it means
-            starting multiple raylet locally. The index will be used in
-            resources, object_manager_ports, node_manager_ports.
+            node_ip_address, worker_path, resources, num_cpus, num_gpus,
+            object_manager_port, node_manager_port, redis_password.
+            resources, object_manager_port, node_manager_port.
        raylet_name (str): The name of the raylet socket to create.
        plasma_store_name (str): The name of the plasma store socket to connect
             to.
-        num_workers (int): The number of workers to start.
+        num_initial_workers (int): The number of workers to start initially.
        use_valgrind (bool): True if the raylet should be started inside
            of valgrind. If this is True, use_profiler must be False.
        use_profiler (bool): True if the raylet should be started inside
@@ -926,7 +932,8 @@ def start_raylet(ray_params,
    if use_valgrind and use_profiler:
        raise Exception("Cannot use valgrind and profiler at the same time.")

-    static_resources = check_and_update_resources(ray_params.resources[index])
+    static_resources = check_and_update_resources(
+        ray_params.num_cpus, ray_params.num_gpus, ray_params.resources)

    # Limit the number of workers that can be started in parallel by the
    # raylet. However, make sure it is at least 1.
@@ -956,23 +963,23 @@ def start_raylet(ray_params,

    # If the object manager port is None, then use 0 to cause the object
    # manager to choose its own port.
-    if ray_params.object_manager_ports[index] is None:
-        ray_params.object_manager_ports[index] = 0
+    if ray_params.object_manager_port is None:
+        ray_params.object_manager_port = 0
    # If the node manager port is None, then use 0 to cause the node manager
    # to choose its own port.
-    if ray_params.node_manager_ports[index] is None:
-        ray_params.node_manager_ports[index] = 0
+    if ray_params.node_manager_port is None:
+        ray_params.node_manager_port = 0

    command = [
        RAYLET_EXECUTABLE,
        raylet_name,
        plasma_store_name,
-        str(ray_params.object_manager_ports[index]),
-        str(ray_params.node_manager_ports[index]),
+        str(ray_params.object_manager_port),
+        str(ray_params.node_manager_port),
        ray_params.node_ip_address,
        gcs_ip_address,
        gcs_port,
-        str(num_workers),
+        str(num_initial_workers),
        str(maximum_startup_concurrency),
        resource_argument,
        config_str,
@@ -1289,9 +1296,8 @@ def start_ray_processes(ray_params, cleanup=True):
    Args:
        ray_params (ray.params.RayParams): The RayParams instance. The
            following parameters will be set to default values if it's None:
-            node_ip_address("127.0.0.1"), num_local_schedulers(1),
-            include_webui(False), worker_path(path of default_worker.py),
-            include_log_monitor(False)
+            node_ip_address("127.0.0.1"), include_webui(False),
+            worker_path(path of default_worker.py), include_log_monitor(False)
        cleanup (bool): If cleanup is true, then the processes started here
            will be killed by services.cleanup() when the Python process that
            called this method exits.
@@ -1312,23 +1318,16 @@ def start_ray_processes(ray_params, cleanup=True):
    ray_params.update_if_absent(
        include_log_monitor=False,
        resources={},
-        num_local_schedulers=1,
        include_webui=False,
        node_ip_address="127.0.0.1")
-    if not isinstance(ray_params.resources, list):
-        ray_params.resources = ray_params.num_local_schedulers * [
-            ray_params.resources
-        ]

    if ray_params.num_workers is not None:
        raise Exception("The 'num_workers' argument is deprecated. Please use "
                        "'num_cpus' instead.")
    else:
-        workers_per_local_scheduler = []
-        for resource_dict in ray_params.resources:
-            cpus = resource_dict.get("CPU")
-            workers_per_local_scheduler.append(cpus if cpus is not None else
-                                               multiprocessing.cpu_count())
+        num_initial_workers = (ray_params.num_cpus
+                               if ray_params.num_cpus is not None else
+                               multiprocessing.cpu_count())

    ray_params.update_if_absent(
        address_info={},
@@ -1402,37 +1401,16 @@ def start_ray_processes(ray_params, cleanup=True):
            redis_password=ray_params.redis_password)

    # Initialize with existing services.
-    if "object_store_addresses" not in ray_params.address_info:
-        ray_params.address_info["object_store_addresses"] = []
-    object_store_addresses = ray_params.address_info["object_store_addresses"]
-    if "raylet_socket_names" not in ray_params.address_info:
-        ray_params.address_info["raylet_socket_names"] = []
-    raylet_socket_names = ray_params.address_info["raylet_socket_names"]
+    object_store_address = ray_params.address_info.get("object_store_address")
+    raylet_socket_name = ray_params.address_info.get("raylet_socket_name")

-    # Get the ports to use for the object managers if any are provided.
-    if not isinstance(ray_params.object_manager_ports, list):
-        assert (ray_params.object_manager_ports is None
-                or ray_params.num_local_schedulers == 1)
-        ray_params.object_manager_ports = (ray_params.num_local_schedulers *
-                                           [ray_params.object_manager_ports])
-    assert len(
-        ray_params.object_manager_ports) == ray_params.num_local_schedulers
-    if not isinstance(ray_params.node_manager_ports, list):
-        assert (ray_params.node_manager_ports is None
-                or ray_params.num_local_schedulers == 1)
-        ray_params.node_manager_ports = (
-            ray_params.num_local_schedulers * [ray_params.node_manager_ports])
-    assert len(
-        ray_params.node_manager_ports) == ray_params.num_local_schedulers
-
-    # Start any object stores that do not yet exist.
-    for i in range(ray_params.num_local_schedulers -
-                   len(object_store_addresses)):
+    # Start an object store if it does not yet exist.
+    if object_store_address is None:
        # Start Plasma.
        plasma_store_stdout_file, plasma_store_stderr_file = (
-            new_plasma_store_log_file(i, ray_params.redirect_output))
+            new_plasma_store_log_file(ray_params.redirect_output))

-        object_store_address = start_plasma_store(
+        ray_params.address_info["object_store_address"] = start_plasma_store(
            ray_params.node_ip_address,
            ray_params.redis_address,
            store_stdout_file=plasma_store_stdout_file,
@@ -1443,25 +1421,25 @@ def start_ray_processes(ray_params, cleanup=True):
            huge_pages=ray_params.huge_pages,
            plasma_store_socket_name=ray_params.plasma_store_socket_name,
            redis_password=ray_params.redis_password)
-        object_store_addresses.append(object_store_address)
        time.sleep(0.1)
+    else:
+        raise Exception("JUST CHECKING IF THIS CODE GETS HIT.")

    # Start any raylets that do not exist yet.
-    for raylet_index in range(
-            len(raylet_socket_names), ray_params.num_local_schedulers):
+    if raylet_socket_name is None:
        raylet_stdout_file, raylet_stderr_file = new_raylet_log_file(
-            raylet_index, redirect_output=ray_params.redirect_worker_output)
-        ray_params.address_info["raylet_socket_names"].append(
-            start_raylet(
-                ray_params,
-                raylet_index,
-                ray_params.raylet_socket_name or get_raylet_socket_name(),
-                object_store_addresses[raylet_index],
-                num_workers=workers_per_local_scheduler[raylet_index],
-                stdout_file=raylet_stdout_file,
-                stderr_file=raylet_stderr_file,
-                cleanup=cleanup,
-                config=config))
+            redirect_output=ray_params.redirect_worker_output)
+        ray_params.address_info["raylet_socket_name"] = start_raylet(
+            ray_params,
+            ray_params.raylet_socket_name or get_raylet_socket_name(),
+            ray_params.address_info["object_store_address"],
+            num_initial_workers=num_initial_workers,
+            stdout_file=raylet_stdout_file,
+            stderr_file=raylet_stderr_file,
+            cleanup=cleanup,
+            config=config)
+    else:
+        raise Exception("JUST CHECKING IF THIS CODE GETS HIT.")

    # Try to start the web UI.
    if ray_params.include_webui:
@@ -1486,12 +1464,11 @@ def start_ray_node(ray_params, cleanup=True):
    Args:
        ray_params (ray.params.RayParams): The RayParams instance. The
            following parameters could be checked: node_ip_address,
-            redis_address, object_manager_ports, node_manager_ports,
-            num_workers, num_local_schedulers, object_store_memory,
-            redis_password, worker_path, cleanup, redirect_worker_output,
-            redirect_output, resources, plasma_directory, huge_pages,
-            plasma_store_socket_name, raylet_socket_name, temp_dir,
-            _internal_config
+            redis_address, object_manager_port, node_manager_port,
+            num_workers, object_store_memory, redis_password, worker_path,
+            cleanup, redirect_worker_output, redirect_output, resources,
+            plasma_directory, huge_pages, plasma_store_socket_name,
+            raylet_socket_name, temp_dir, _internal_config.
        cleanup (bool): If cleanup is true, then the processes started here
            will be killed by services.cleanup() when the Python process that
            called this method exits.
@@ -1513,14 +1490,14 @@ def start_ray_head(ray_params, cleanup=True):
    Args:
        ray_params (ray.params.RayParams): The RayParams instance. The
            following parameters could be checked: address_info,
-            object_manager_ports, node_manager_ports, node_ip_address,
-            redis_port, redis_shard_ports, num_workers, num_local_schedulers,
-            object_store_memory, redis_max_memory, worker_path, cleanup,
-            redirect_worker_output, redirect_output,
-            start_workers_from_local_scheduler, resources, num_redis_shards,
-            redis_max_clients, redis_password, include_webui, huge_pages,
-            plasma_directory, autoscaling_config, plasma_store_socket_name,
-            raylet_socket_name, temp_dir, _internal_config
+            object_manager_port, node_manager_port, node_ip_address,
+            redis_port, redis_shard_ports, num_workers, object_store_memory,
+            redis_max_memory, worker_path, cleanup, redirect_worker_output,
+            redirect_output, start_workers_from_local_scheduler, resources,
+            num_redis_shards, redis_max_clients, redis_password, include_webui,
+            huge_pages, plasma_directory, autoscaling_config,
+            plasma_store_socket_name, raylet_socket_name, temp_dir,
+            _internal_config.
        cleanup (bool): If cleanup is true, then the processes started here
            will be killed by services.cleanup() when the Python process that
            called this method exits.
@@ -194,11 +194,10 @@ def new_redis_log_file(redirect_output, shard_number=None):
    return redis_stdout_file, redis_stderr_file


-def new_raylet_log_file(local_scheduler_index, redirect_output):
+def new_raylet_log_file(redirect_output):
    """Create new logging files for raylet."""
    raylet_stdout_file, raylet_stderr_file = new_log_files(
-        "raylet_{}".format(local_scheduler_index),
-        redirect_output=redirect_output)
+        "raylet", redirect_output=redirect_output)
    return raylet_stdout_file, raylet_stderr_file


@@ -223,10 +222,10 @@ def new_log_monitor_log_file():
    return log_monitor_stdout_file, log_monitor_stderr_file


-def new_plasma_store_log_file(local_scheduler_index, redirect_output):
+def new_plasma_store_log_file(redirect_output):
    """Create new logging files for the plasma store."""
    plasma_store_stdout_file, plasma_store_stderr_file = new_log_files(
-        "plasma_store_{}".format(local_scheduler_index), redirect_output)
+        "plasma_store", redirect_output)
    return plasma_store_stdout_file, plasma_store_stderr_file


@@ -63,7 +63,7 @@ class Cluster(object):

        All nodes are by default started with the following settings:
            cleanup=True,
-            resources={"CPU": 1},
+            num_cpus=1,
            object_store_memory=100 * (2**20) # 100 MB

        Args:
@@ -74,9 +74,7 @@ class Cluster(object):
            Node object of the added Ray node.
        """
        node_kwargs = {
-            "resources": {
-                "CPU": 1
-            },
+            "num_cpus": 1,
            "object_store_memory": 100 * (2**20)  # 100 MB
        }
        node_kwargs.update(override_kwargs)
@@ -103,7 +101,7 @@ class Cluster(object):
            node = Node(address_info, process_dict_copy)
            self.worker_nodes[node] = address_info
        logger.info("Starting Node with raylet socket {}".format(
-            address_info["raylet_socket_names"]))
+            address_info["raylet_socket_name"]))

        return node

@@ -125,10 +123,10 @@ class Cluster(object):
        assert not node.any_processes_alive(), (
            "There are zombie processes left over after killing.")

-    def wait_for_nodes(self, retries=30):
+    def wait_for_nodes(self, retries=100):
        """Waits for all nodes to be registered with global state.

-        By default, waits for 3 seconds.
+        By default, waits for 10 seconds.

        Args:
            retries (int): Number of times to retry checking client table.
@@ -239,4 +237,4 @@ class Node(object):
        Assuming one plasma store per raylet, this may be used as a unique
        identifier for a node.
        """
-        return self.address_info['object_store_addresses'][0]
+        return self.address_info['object_store_address']
@@ -30,7 +30,7 @@ def cluster_start():
        initialize_head=True,
        connect=True,
        head_node_args={
-            "resources": dict(CPU=1),
+            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
@@ -94,7 +94,7 @@ def test_add_remove_cluster_resources(cluster_start):
    cluster = cluster_start
    assert ray.global_state.cluster_resources()["CPU"] == 1
    nodes = []
-    nodes += [cluster.add_node(resources=dict(CPU=1))]
+    nodes += [cluster.add_node(num_cpus=1)]
    assert cluster.wait_for_nodes()
    assert ray.global_state.cluster_resources()["CPU"] == 2

@@ -103,6 +103,6 @@ def test_add_remove_cluster_resources(cluster_start):
    assert ray.global_state.cluster_resources()["CPU"] == 1

    for i in range(5):
-        nodes += [cluster.add_node(resources=dict(CPU=1))]
+        nodes += [cluster.add_node(num_cpus=1)]
    assert cluster.wait_for_nodes()
    assert ray.global_state.cluster_resources()["CPU"] == 6
@@ -30,7 +30,7 @@ def _start_new_cluster():
        initialize_head=True,
        connect=True,
        head_node_args={
-            "resources": dict(CPU=1),
+            "num_cpus": 1,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
@@ -58,7 +58,7 @@ def start_connected_emptyhead_cluster():
        initialize_head=True,
        connect=True,
        head_node_args={
-            "resources": dict(CPU=0),
+            "num_cpus": 0,
            "_internal_config": json.dumps({
                "num_heartbeats_timeout": 10
            })
@@ -84,7 +84,7 @@ def test_counting_resources(start_connected_cluster):
        runner.add_trial(t)

    runner.step()  # run 1
-    nodes += [cluster.add_node(resources=dict(CPU=1))]
+    nodes += [cluster.add_node(num_cpus=1)]
    assert cluster.wait_for_nodes()
    assert ray.global_state.cluster_resources()["CPU"] == 2
    cluster.remove_node(nodes.pop())
@@ -94,7 +94,7 @@ def test_counting_resources(start_connected_cluster):
    assert sum(t.status == Trial.RUNNING for t in runner.get_trials()) == 1

    for i in range(5):
-        nodes += [cluster.add_node(resources=dict(CPU=1))]
+        nodes += [cluster.add_node(num_cpus=1)]
    assert cluster.wait_for_nodes()
    assert ray.global_state.cluster_resources()["CPU"] == 6

@@ -105,7 +105,7 @@ def test_counting_resources(start_connected_cluster):
 def test_remove_node_before_result(start_connected_emptyhead_cluster):
    """Tune continues when node is removed before trial returns."""
    cluster = start_connected_emptyhead_cluster
-    node = cluster.add_node(resources=dict(CPU=1))
+    node = cluster.add_node(num_cpus=1)
    assert cluster.wait_for_nodes()

    runner = TrialRunner(BasicVariantGenerator())
@@ -122,7 +122,7 @@ def test_remove_node_before_result(start_connected_emptyhead_cluster):
    runner.step()  # run 1
    assert trial.status == Trial.RUNNING
    cluster.remove_node(node)
-    cluster.add_node(resources=dict(CPU=1))
+    cluster.add_node(num_cpus=1)
    cluster.wait_for_nodes()
    assert ray.global_state.cluster_resources()["CPU"] == 1

@@ -144,7 +144,7 @@ def test_trial_migration(start_connected_emptyhead_cluster):
    The trial state should also be consistent with the checkpoint.
    """
    cluster = start_connected_emptyhead_cluster
-    node = cluster.add_node(resources=dict(CPU=1))
+    node = cluster.add_node(num_cpus=1)
    assert cluster.wait_for_nodes()

    runner = TrialRunner(BasicVariantGenerator())
@@ -162,7 +162,7 @@ def test_trial_migration(start_connected_emptyhead_cluster):
    runner.step()  # start
    runner.step()  # 1 result
    assert t.last_result is not None
-    node2 = cluster.add_node(resources=dict(CPU=1))
+    node2 = cluster.add_node(num_cpus=1)
    cluster.remove_node(node)
    assert cluster.wait_for_nodes()
    runner.step()  # Recovery step
@@ -183,7 +183,7 @@ def test_trial_migration(start_connected_emptyhead_cluster):
    runner.step()  # 1 result
    runner.step()  # 2 result and checkpoint
    assert t2.has_checkpoint()
-    node3 = cluster.add_node(resources=dict(CPU=1))
+    node3 = cluster.add_node(num_cpus=1)
    cluster.remove_node(node2)
    assert cluster.wait_for_nodes()
    runner.step()  # Recovery step
@@ -198,7 +198,7 @@ def test_trial_migration(start_connected_emptyhead_cluster):
    runner.add_trial(t3)
    runner.step()  # start
    runner.step()  # 1 result
-    cluster.add_node(resources=dict(CPU=1))
+    cluster.add_node(num_cpus=1)
    cluster.remove_node(node3)
    assert cluster.wait_for_nodes()
    runner.step()  # Error handling step
@@ -215,7 +215,7 @@ def test_trial_migration(start_connected_emptyhead_cluster):
 def test_trial_requeue(start_connected_emptyhead_cluster):
    """Removing a node in full cluster causes Trial to be requeued."""
    cluster = start_connected_emptyhead_cluster
-    node = cluster.add_node(resources=dict(CPU=1))
+    node = cluster.add_node(num_cpus=1)
    assert cluster.wait_for_nodes()

    runner = TrialRunner(BasicVariantGenerator())
@@ -246,7 +246,7 @@ def test_trial_requeue(start_connected_emptyhead_cluster):
 def test_migration_checkpoint_removal(start_connected_emptyhead_cluster):
    """Test checks that trial restarts if checkpoint is lost w/ node fail."""
    cluster = start_connected_emptyhead_cluster
-    node = cluster.add_node(resources=dict(CPU=1))
+    node = cluster.add_node(num_cpus=1)
    assert cluster.wait_for_nodes()

    runner = TrialRunner(BasicVariantGenerator())
@@ -265,7 +265,7 @@ def test_migration_checkpoint_removal(start_connected_emptyhead_cluster):
    runner.step()  # 1 result
    runner.step()  # 2 result and checkpoint
    assert t1.has_checkpoint()
-    cluster.add_node(resources=dict(CPU=1))
+    cluster.add_node(num_cpus=1)
    cluster.remove_node(node)
    assert cluster.wait_for_nodes()
    shutil.rmtree(os.path.dirname(t1._checkpoint.value))
@@ -280,7 +280,7 @@ def test_migration_checkpoint_removal(start_connected_emptyhead_cluster):
 def test_cluster_down_simple(start_connected_cluster, tmpdir):
    """Tests that TrialRunner save/restore works on cluster shutdown."""
    cluster = start_connected_cluster
-    cluster.add_node(resources=dict(CPU=1))
+    cluster.add_node(num_cpus=1)
    assert cluster.wait_for_nodes()

    dirpath = str(tmpdir)
@@ -1204,17 +1204,14 @@ def get_address_info_from_redis_helper(redis_address,
    if len(raylets) == 0:
        raise Exception(
            "Redis has started but no raylets have registered yet.")
-    object_store_addresses = [
-        ray.utils.decode(raylet.ObjectStoreSocketName()) for raylet in raylets
-    ]
-    raylet_socket_names = [
-        ray.utils.decode(raylet.RayletSocketName()) for raylet in raylets
-    ]
+
+    object_store_address = ray.utils.decode(raylets[0].ObjectStoreSocketName())
+    raylet_socket_name = ray.utils.decode(raylets[0].RayletSocketName())
    return {
        "node_ip_address": node_ip_address,
        "redis_address": redis_address,
-        "object_store_addresses": object_store_addresses,
-        "raylet_socket_names": raylet_socket_names,
+        "object_store_address": object_store_address,
+        "raylet_socket_name": raylet_socket_name,
        # Web UI should be running.
        "webui_url": _webui_url_helper(redis_client)
    }
@@ -1242,44 +1239,6 @@ def get_address_info_from_redis(redis_address,
        counter += 1


-def _normalize_resource_arguments(num_cpus, num_gpus, resources,
-                                  num_local_schedulers):
-    """Stick the CPU and GPU arguments into the resources dictionary.
-
-    This also checks that the arguments are well-formed.
-
-    Args:
-        num_cpus: Either a number of CPUs or a list of numbers of CPUs.
-        num_gpus: Either a number of CPUs or a list of numbers of CPUs.
-        resources: Either a dictionary of resource mappings or a list of
-            dictionaries of resource mappings.
-        num_local_schedulers: The number of local schedulers.
-
-    Returns:
-        A list of dictionaries of resources of length num_local_schedulers.
-    """
-    if resources is None:
-        resources = {}
-    if not isinstance(num_cpus, list):
-        num_cpus = num_local_schedulers * [num_cpus]
-    if not isinstance(num_gpus, list):
-        num_gpus = num_local_schedulers * [num_gpus]
-    if not isinstance(resources, list):
-        resources = num_local_schedulers * [resources]
-
-    new_resources = [r.copy() for r in resources]
-
-    for i in range(num_local_schedulers):
-        assert "CPU" not in new_resources[i], "Use the 'num_cpus' argument."
-        assert "GPU" not in new_resources[i], "Use the 'num_gpus' argument."
-        if num_cpus[i] is not None:
-            new_resources[i]["CPU"] = num_cpus[i]
-        if num_gpus[i] is not None:
-            new_resources[i]["GPU"] = num_gpus[i]
-
-    return new_resources
-
-
 def _init(ray_params, driver_id=None):
    """Helper method to connect to an existing Ray cluster or start a new one.

@@ -1291,8 +1250,8 @@ def _init(ray_params, driver_id=None):
        ray_params (ray.params.RayParams): The RayParams instance. The
            following parameters could be checked: address_info,
            start_ray_local, object_id_seed, num_workers,
-            num_local_schedulers, object_store_memory, redis_max_memory,
-            local_mode, redirect_worker_output, driver_mode, redirect_output,
+            object_store_memory, redis_max_memory, local_mode,
+            redirect_worker_output, driver_mode, redirect_output,
            start_workers_from_local_scheduler, num_cpus, num_gpus, resources,
            num_redis_shards, redis_max_clients, redis_password,
            plasma_directory, huge_pages, include_webui, driver_id,
@@ -1333,18 +1292,9 @@ def _init(ray_params, driver_id=None):
        # are already registered in address_info.
        ray_params.update_if_absent(
            node_ip_address=ray.services.get_node_ip_address())
-        # Use 1 local scheduler if num_local_schedulers is not provided. If
-        # existing local schedulers are provided, use that count as
-        # num_local_schedulers.
-        ray_params.update_if_absent(num_local_schedulers=1)
        # Use 1 additional redis shard if num_redis_shards is not provided.
        ray_params.update_if_absent(num_redis_shards=1)

-        # Stick the CPU and GPU resources into the resource dictionary.
-        ray_params.resources = _normalize_resource_arguments(
-            ray_params.num_cpus, ray_params.num_gpus, ray_params.resources,
-            ray_params.num_local_schedulers)
-
        # Start the scheduler, object store, and some workers. These will be
        # killed by the call to shutdown(), which happens when the Python
        # script exits.
@@ -1356,9 +1306,6 @@ def _init(ray_params, driver_id=None):
        if ray_params.num_workers is not None:
            raise Exception("When connecting to an existing cluster, "
                            "num_workers must not be provided.")
-        if ray_params.num_local_schedulers is not None:
-            raise Exception("When connecting to an existing cluster, "
-                            "num_local_schedulers must not be provided.")
        if ray_params.num_cpus is not None or ray_params.num_gpus is not None:
            raise Exception("When connecting to an existing cluster, num_cpus "
                            "and num_gpus must not be provided.")
@@ -1417,11 +1364,11 @@ def _init(ray_params, driver_id=None):
            "node_ip_address": ray_params.node_ip_address,
            "redis_address": ray_params.address_info["redis_address"],
            "store_socket_name": ray_params.address_info[
-                "object_store_addresses"][0],
+                "object_store_address"],
            "webui_url": ray_params.address_info["webui_url"],
        }
        driver_address_info["raylet_socket_name"] = (
-            ray_params.address_info["raylet_socket_names"][0])
+            ray_params.address_info["raylet_socket_name"])

    # We only pass `temp_dir` to a worker (WORKER_MODE).
    # It can't be a worker here.