diff --git a/ci/long_running_tests/workloads/actor_deaths.py b/ci/long_running_tests/workloads/actor_deaths.py index 55f05c34c..993a495f3 100644 --- a/ci/long_running_tests/workloads/actor_deaths.py +++ b/ci/long_running_tests/workloads/actor_deaths.py @@ -33,7 +33,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/apex.py b/ci/long_running_tests/workloads/apex.py index 2b177ab6f..3b99dce6b 100644 --- a/ci/long_running_tests/workloads/apex.py +++ b/ci/long_running_tests/workloads/apex.py @@ -30,7 +30,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/impala.py b/ci/long_running_tests/workloads/impala.py index 7e9eabc0f..9fc9dadf6 100644 --- a/ci/long_running_tests/workloads/impala.py +++ b/ci/long_running_tests/workloads/impala.py @@ -30,7 +30,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/many_actor_tasks.py b/ci/long_running_tests/workloads/many_actor_tasks.py index 2dd395c0e..e1a5de9ed 100644 --- a/ci/long_running_tests/workloads/many_actor_tasks.py +++ b/ci/long_running_tests/workloads/many_actor_tasks.py @@ -31,7 +31,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/many_drivers.py b/ci/long_running_tests/workloads/many_drivers.py index b3fb4d1ee..f9eae2f8d 100644 --- a/ci/long_running_tests/workloads/many_drivers.py +++ b/ci/long_running_tests/workloads/many_drivers.py @@ -32,7 +32,7 @@ for i in range(num_nodes): resources={str(i): 5}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. @@ -41,7 +41,7 @@ ray.init(redis_address=cluster.redis_address) driver_script = """ import ray -ray.init(redis_address="{}") +ray.init(address="{}") num_nodes = {} @@ -65,7 +65,7 @@ for _ in range(5): assert ray.get(actor.method.remote()) == 1 print("success") -""".format(cluster.redis_address, num_nodes) +""".format(cluster.address, num_nodes) @ray.remote diff --git a/ci/long_running_tests/workloads/many_tasks.py b/ci/long_running_tests/workloads/many_tasks.py index 852e6a1bf..6aca51b49 100644 --- a/ci/long_running_tests/workloads/many_tasks.py +++ b/ci/long_running_tests/workloads/many_tasks.py @@ -31,7 +31,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/node_failures.py b/ci/long_running_tests/workloads/node_failures.py index 7a50e2dd7..693c47bac 100644 --- a/ci/long_running_tests/workloads/node_failures.py +++ b/ci/long_running_tests/workloads/node_failures.py @@ -31,7 +31,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/long_running_tests/workloads/pbt.py b/ci/long_running_tests/workloads/pbt.py index 5e63596c4..936acab56 100644 --- a/ci/long_running_tests/workloads/pbt.py +++ b/ci/long_running_tests/workloads/pbt.py @@ -31,7 +31,7 @@ for i in range(num_nodes): resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) -ray.init(redis_address=cluster.redis_address) +ray.init(address=cluster.address) # Run the workload. diff --git a/ci/performance_tests/test_performance.py b/ci/performance_tests/test_performance.py index 9c70fa08c..19a593583 100644 --- a/ci/performance_tests/test_performance.py +++ b/ci/performance_tests/test_performance.py @@ -26,7 +26,7 @@ parser.add_argument( help="True if the object store should not be warmed up. This could cause " "the benchmarks to appear slower than usual.") parser.add_argument( - "--redis-address", + "--address", required=False, type=str, help="The address of the cluster to connect to. If this is ommitted, then " @@ -57,7 +57,7 @@ def start_local_cluster(num_nodes, object_store_memory): resources={str(i): 500}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) return cluster @@ -228,12 +228,12 @@ if __name__ == "__main__": if num_nodes < 2: raise ValueError("The --num-nodes argument must be at least 2.") - if args.redis_address: - ray.init(redis_address=args.redis_address) + if args.address: + ray.init(address=args.address) wait_for_and_check_cluster_configuration(num_nodes) logger.warning( "Running performance benchmarks on the cluster with " - "address %s.", args.redis_address) + "address %s.", args.address) else: logger.warning( "Running performance benchmarks on a simulated cluster " diff --git a/ci/stress_tests/test_dead_actors.py b/ci/stress_tests/test_dead_actors.py index a11c006de..9db90f926 100644 --- a/ci/stress_tests/test_dead_actors.py +++ b/ci/stress_tests/test_dead_actors.py @@ -12,7 +12,7 @@ import ray logger = logging.getLogger(__name__) -ray.init(redis_address="localhost:6379") +ray.init(address="localhost:6379") @ray.remote diff --git a/ci/stress_tests/test_many_tasks.py b/ci/stress_tests/test_many_tasks.py index 985e05c28..99c1de02e 100644 --- a/ci/stress_tests/test_many_tasks.py +++ b/ci/stress_tests/test_many_tasks.py @@ -13,7 +13,7 @@ import ray logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) -ray.init(redis_address="localhost:6379") +ray.init(address="localhost:6379") # These numbers need to correspond with the autoscaler config file. # The number of remote nodes in the autoscaler should upper bound diff --git a/ci/travis/install-dependencies.sh b/ci/travis/install-dependencies.sh index 8864c2fae..8a70b4621 100755 --- a/ci/travis/install-dependencies.sh +++ b/ci/travis/install-dependencies.sh @@ -25,7 +25,7 @@ if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q scipy tensorflow cython==0.29.0 gym opencv-python-headless pyyaml pandas==0.24.2 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx tabulate psutil + feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout pytest-sugar mock flaky networkx tabulate psutil elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then sudo apt-get update sudo apt-get install -y python-dev python-numpy build-essential curl unzip tmux gdb @@ -34,21 +34,21 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q scipy tensorflow cython==0.29.0 gym opencv-python-headless pyyaml pandas==0.24.2 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx tabulate psutil aiohttp + feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout pytest-sugar flaky networkx tabulate psutil aiohttp elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then # Install miniconda. wget https://repo.continuum.io/miniconda/Miniconda2-4.5.4-MacOSX-x86_64.sh -O miniconda.sh -nv bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.29.0 tensorflow gym opencv-python-headless pyyaml pandas==0.24.2 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout mock flaky networkx tabulate psutil + feather-format lxml openpyxl xlrd py-spy setproctitle faulthandler pytest-timeout pytest-sugar mock flaky networkx tabulate psutil elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then # Install miniconda. wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-MacOSX-x86_64.sh -O miniconda.sh -nv bash miniconda.sh -b -p $HOME/miniconda export PATH="$HOME/miniconda/bin:$PATH" pip install -q cython==0.29.0 tensorflow gym opencv-python-headless pyyaml pandas==0.24.2 requests \ - feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout flaky networkx tabulate psutil aiohttp + feather-format lxml openpyxl xlrd py-spy setproctitle pytest-timeout pytest-sugar flaky networkx tabulate psutil aiohttp elif [[ "$LINT" == "1" ]]; then sudo apt-get update sudo apt-get install -y build-essential curl unzip diff --git a/doc/source/autoscaling.rst b/doc/source/autoscaling.rst index 30cbe6d48..56e197460 100644 --- a/doc/source/autoscaling.rst +++ b/doc/source/autoscaling.rst @@ -14,7 +14,7 @@ as described in `the boto docs `__ cluster config file will create a small cluster with a m5.large head node (on-demand) configured to autoscale up to two m5.large `spot workers `__. Try it out by running these commands from your personal computer. Once the cluster is started, you can then -SSH into the head node, ``source activate tensorflow_p36``, and then run Ray programs with ``ray.init(redis_address="localhost:6379")``. +SSH into the head node, ``source activate tensorflow_p36``, and then run Ray programs with ``ray.init(address="localhost:6379")``. .. code-block:: bash @@ -37,7 +37,7 @@ First, install the Google API client (``pip install google-api-python-client``), Then you're ready to go. The provided `ray/python/ray/autoscaler/gcp/example-full.yaml `__ cluster config file will create a small cluster with a n1-standard-2 head node (on-demand) configured to autoscale up to two n1-standard-2 `preemptible workers `__. Note that you'll need to fill in your project id in those templates. Try it out by running these commands from your personal computer. Once the cluster is started, you can then -SSH into the head node and then run Ray programs with ``ray.init(redis_address="localhost:6379")``. +SSH into the head node and then run Ray programs with ``ray.init(address="localhost:6379")``. .. code-block:: bash @@ -59,7 +59,7 @@ This is used when you have a list of machine IP addresses to connect in a Ray cl Be sure to specify the proper ``head_ip``, list of ``worker_ips``, and the ``ssh_user`` field. Try it out by running these commands from your personal computer. Once the cluster is started, you can then -SSH into the head node and then run Ray programs with ``ray.init(redis_address="localhost:6379")``. +SSH into the head node and then run Ray programs with ``ray.init(address="localhost:6379")``. .. code-block:: bash @@ -77,7 +77,7 @@ SSH into the head node and then run Ray programs with ``ray.init(redis_address=" Running commands on new and existing clusters --------------------------------------------- -You can use ``ray exec`` to conveniently run commands on clusters. Note that scripts you run should connect to Ray via ``ray.init(redis_address="localhost:6379")``. +You can use ``ray exec`` to conveniently run commands on clusters. Note that scripts you run should connect to Ray via ``ray.init(address="localhost:6379")``. .. code-block:: bash diff --git a/doc/source/configure.rst b/doc/source/configure.rst index d3a3337f2..a5c4b4c41 100644 --- a/doc/source/configure.rst +++ b/doc/source/configure.rst @@ -36,7 +36,7 @@ When starting Ray from the command line, pass the ``--num-cpus`` and ``--num-cpu $ ray start --head --num-cpus= --num-gpus= # To start a non-head node. - $ ray start --redis-address= --num-cpus= --num-gpus= + $ ray start --address=
--num-cpus= --num-gpus= # Specifying custom resources ray start [--head] --num-cpus= --resources='{"Resource1": 4, "Resource2": 16}' @@ -46,7 +46,7 @@ If using the command line, connect to the Ray cluster as follow: .. code-block:: python # Connect to ray. Notice if connected to existing cluster, you don't specify resources. - ray.init(redis_address=) + ray.init(address=
) Logging and Debugging diff --git a/doc/source/deploy-on-kubernetes.rst b/doc/source/deploy-on-kubernetes.rst index 6c5713509..78e664b65 100644 --- a/doc/source/deploy-on-kubernetes.rst +++ b/doc/source/deploy-on-kubernetes.rst @@ -57,7 +57,7 @@ Start an IPython interpreter, e.g., ``ipython`` # Note that if you run this script on a non-head node, then you must replace # "localhost" with socket.gethostbyname("ray-head"). - ray.init(redis_address="localhost:6379") + ray.init(address="localhost:6379") @ray.remote def f(x): diff --git a/doc/source/deploying-on-slurm.rst b/doc/source/deploying-on-slurm.rst index ab2c78067..3ff4dbd2b 100644 --- a/doc/source/deploying-on-slurm.rst +++ b/doc/source/deploying-on-slurm.rst @@ -23,7 +23,7 @@ Clusters managed by Slurm may require that Ray is initialized as a part of the s node1=${nodes_array[0]} - ip_prefix=$(srun --nodes=1 --ntasks=1 -w $node1 hostname --ip-address) # Making redis-address + ip_prefix=$(srun --nodes=1 --ntasks=1 -w $node1 hostname --ip-address) # Making address suffix=':6379' ip_head=$ip_prefix$suffix @@ -35,7 +35,7 @@ Clusters managed by Slurm may require that Ray is initialized as a part of the s for (( i=1; i<=$worker_num; i++ )) do node2=${nodes_array[$i]} - srun --nodes=1 --ntasks=1 -w $node2 ray start --block --redis-address=$ip_head & # Starting the workers + srun --nodes=1 --ntasks=1 -w $node2 ray start --block --address=$ip_head & # Starting the workers sleep 5 done @@ -49,7 +49,7 @@ Clusters managed by Slurm may require that Ray is initialized as a part of the s import time import ray - ray.init(redis_address=os.environ["ip_head"]) + ray.init(address=os.environ["ip_head"]) @ray.remote def f(): diff --git a/doc/source/example-resnet.rst b/doc/source/example-resnet.rst index 4615b26a1..f0fbd2e7d 100644 --- a/doc/source/example-resnet.rst +++ b/doc/source/example-resnet.rst @@ -40,8 +40,8 @@ Then run the training script that matches the dataset you downloaded. --num_gpus=1 To run the training script on a cluster with multiple machines, you will need -to also pass in the flag ``--redis-address=``, where -```` is the address of the Redis server on the head node. +to also pass in the flag ``--address=
``, where +``
`` is the address of the Redis server on the head node. The script will print out the IP address that the log files are stored on. In the single-node case, you can ignore this and run tensorboard on the current diff --git a/doc/source/example-rl-pong.rst b/doc/source/example-rl-pong.rst index c6309bb90..dee2ddd85 100644 --- a/doc/source/example-rl-pong.rst +++ b/doc/source/example-rl-pong.rst @@ -20,7 +20,7 @@ Then you can run the example as follows. python ray/doc/examples/rl_pong/driver.py --batch-size=10 To run the example on a cluster, simply pass in the flag -``--redis-address=``. +``--address=
``. At the moment, on a large machine with 64 physical cores, computing an update with a batch of size 1 takes about 1 second, a batch of size 10 takes about 2.5 diff --git a/doc/source/internals-overview.rst b/doc/source/internals-overview.rst index 8f2111b0f..151588bad 100644 --- a/doc/source/internals-overview.rst +++ b/doc/source/internals-overview.rst @@ -25,7 +25,7 @@ Connecting to an existing Ray cluster ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To connect to an existing Ray cluster, simply pass the argument address of the -Redis server as the ``redis_address=`` keyword argument into ``ray.init``. In +Redis server as the ``address=`` keyword argument into ``ray.init``. In this case, no new processes will be started when ``ray.init`` is called, and similarly the processes will continue running when the script exits. In this case, all processes except workers that correspond to actors are shared between diff --git a/doc/source/tune-distributed.rst b/doc/source/tune-distributed.rst index 82ff76b9c..1ea97dcad 100644 --- a/doc/source/tune-distributed.rst +++ b/doc/source/tune-distributed.rst @@ -8,7 +8,7 @@ Tune is commonly used for large-scale distributed hyperparameter optimization. T **Quick Summary**: To run a distributed experiment with Tune, you need to: - 1. Make sure your script has ``ray.init(redis_address=...)`` to connect to the existing Ray cluster. + 1. Make sure your script has ``ray.init(address=...)`` to connect to the existing Ray cluster. 2. If a ray cluster does not exist, start a Ray cluster (instructions for `local machines `_, `cloud `_). 3. Run the script on the head node (or use ``ray submit``). @@ -19,7 +19,7 @@ Running a distributed (multi-node) experiment requires Ray to be started already Across your machines, Tune will automatically detect the number of GPUs and CPUs without you needing to manage ``CUDA_VISIBLE_DEVICES``. -To execute a distributed experiment, call ``ray.init(redis_address=XXX)`` before ``tune.run``, where ``XXX`` is the Ray redis address, which defaults to ``localhost:6379``. The Tune python script should be executed only on the head node of the Ray cluster. +To execute a distributed experiment, call ``ray.init(address=XXX)`` before ``tune.run``, where ``XXX`` is the Ray redis address, which defaults to ``localhost:6379``. The Tune python script should be executed only on the head node of the Ray cluster. One common approach to modifying an existing Tune experiment to go distributed is to set an ``argparse`` variable so that toggling between distributed and single-node is seamless. @@ -29,22 +29,22 @@ One common approach to modifying an existing Tune experiment to go distributed i import argparse parser = argparse.ArgumentParser() - parser.add_argument("--ray-redis-address") + parser.add_argument("--ray-address") args = parser.parse_args() - ray.init(redis_address=args.ray_redis_address) + ray.init(address=args.ray_address) tune.run(...) .. code-block:: bash # On the head node, connect to an existing ray cluster - $ python tune_script.py --ray-redis-address=localhost:XXXX + $ python tune_script.py --ray-address=localhost:XXXX If you used a cluster configuration (starting a cluster with ``ray up`` or ``ray submit --start``), use: .. code-block:: bash - ray submit tune-default.yaml tune_script.py --args="--ray-redis-address=localhost:6379" + ray submit tune-default.yaml tune_script.py --args="--ray-address=localhost:6379" .. tip:: @@ -69,7 +69,7 @@ If you have already have a list of nodes, you can follow the local private clust .. code-block:: bash - ray submit tune-default.yaml tune_script.py --args="--ray-redis-address=localhost:6379" + ray submit tune-default.yaml tune_script.py --args="--ray-address=localhost:6379" Manual Local Cluster Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -85,18 +85,18 @@ If you run into issues using the local cluster setup (or want to add nodes manua The command will print out the address of the Redis server that was started (and some other address information). -**Then on all of the other nodes**, run the following. Make sure to replace ```` with the value printed by the command on the head node (it should look something like ``123.45.67.89:6379``). +**Then on all of the other nodes**, run the following. Make sure to replace ``
`` with the value printed by the command on the head node (it should look something like ``123.45.67.89:6379``). .. code-block:: bash - $ ray start --redis-address= + $ ray start --address=
Then, you can run your Tune Python script on the head node like: .. code-block:: bash # On the head node, execute using existing ray cluster - $ python tune_script.py --ray-redis-address= + $ python tune_script.py --ray-address=
Launching a cloud cluster ------------------------- @@ -121,7 +121,7 @@ Ray currently supports AWS and GCP. Below, we will launch nodes on AWS that will .. code-block:: bash - ray submit tune-default.yaml tune_script.py --start --args="--ray-redis-address=localhost:6379" + ray submit tune-default.yaml tune_script.py --start --args="--ray-address=localhost:6379" .. image:: images/tune-upload.png :scale: 50% @@ -208,7 +208,7 @@ Here is an example for running Tune on spot instances. This assumes your AWS cre .. code-block:: bash ray submit tune-default.yaml mnist_pytorch_trainable.py \ - --args="--ray-redis-address=localhost:6379" \ + --args="--ray-address=localhost:6379" \ --start 4. Optionally for testing on AWS or GCP, you can use the following to kill a random worker node after all the worker nodes are up @@ -223,7 +223,7 @@ To summarize, here are the commands to run: wget https://raw.githubusercontent.com/ray-project/ray/master/python/ray/tune/examples/mnist_pytorch_trainable.py wget https://raw.githubusercontent.com/ray-project/ray/master/python/ray/tune/tune-default.yaml - ray submit tune-default.yaml mnist_pytorch_trainable.py --args="--ray-redis-address=localhost:6379" --start + ray submit tune-default.yaml mnist_pytorch_trainable.py --args="--ray-address=localhost:6379" --start # wait a while until after all nodes have started ray kill-random-node tune-default.yaml --hard @@ -240,13 +240,13 @@ Below are some commonly used commands for submitting experiments. Please see the .. code-block:: bash # Upload `tune_experiment.py` from your local machine onto the cluster. Then, - # run `python tune_experiment.py --redis-address=localhost:6379` on the remote machine. - $ ray submit CLUSTER.YAML tune_experiment.py --args="--redis-address=localhost:6379" + # run `python tune_experiment.py --address=localhost:6379` on the remote machine. + $ ray submit CLUSTER.YAML tune_experiment.py --args="--address=localhost:6379" # Start a cluster and run an experiment in a detached tmux session, # and shut down the cluster as soon as the experiment completes. # In `tune_experiment.py`, set `tune.run(upload_dir="s3://...")` to persist results - $ ray submit CLUSTER.YAML --tmux --start --stop tune_experiment.py --args="--redis-address=localhost:6379" + $ ray submit CLUSTER.YAML --tmux --start --stop tune_experiment.py --args="--address=localhost:6379" # To start or update your cluster: $ ray up CLUSTER.YAML [-y] diff --git a/doc/source/tune.rst b/doc/source/tune.rst index 8f3e8303a..b83247eb3 100644 --- a/doc/source/tune.rst +++ b/doc/source/tune.rst @@ -57,9 +57,9 @@ Distributed Quick Start import argparse parser = argparse.ArgumentParser() - parser.add_argument("--ray-redis-address") + parser.add_argument("--ray-address") args = parser.parse_args() - ray.init(redis_address=args.ray_redis_address) + ray.init(address=args.ray_address) Alternatively, download a full example script here: :download:`mnist_pytorch.py <../../python/ray/tune/examples/mnist_pytorch.py>` @@ -74,7 +74,7 @@ Alternatively, download it here: :download:`tune-local-default.yaml <../../pytho .. code-block:: bash - ray submit tune-local-default.yaml mnist_pytorch.py --args="--ray-redis-address=localhost:6379" --start + ray submit tune-local-default.yaml mnist_pytorch.py --args="--ray-address=localhost:6379" --start This will start Ray on all of your machines and run a distributed hyperparameter search across them. @@ -84,7 +84,7 @@ To summarize, here are the full set of commands: wget https://raw.githubusercontent.com/ray-project/ray/master/python/ray/tune/examples/mnist_pytorch.py wget https://raw.githubusercontent.com/ray-project/ray/master/python/ray/tune/tune-local-default.yaml - ray submit tune-local-default.yaml mnist_pytorch.py --args="--ray-redis-address=localhost:6379" --start + ray submit tune-local-default.yaml mnist_pytorch.py --args="--ray-address=localhost:6379" --start Take a look at the `Distributed Experiments `_ documentation for more details, including: diff --git a/doc/source/using-ray-on-a-cluster.rst b/doc/source/using-ray-on-a-cluster.rst index 8b03af337..d0c5a94c2 100644 --- a/doc/source/using-ray-on-a-cluster.rst +++ b/doc/source/using-ray-on-a-cluster.rst @@ -33,12 +33,12 @@ The command will print out the address of the Redis server that was started (and some other address information). **Then on all of the other nodes**, run the following. Make sure to replace -```` with the value printed by the command on the head node (it +``
`` with the value printed by the command on the head node (it should look something like ``123.45.67.89:6379``). .. code-block:: bash - ray start --redis-address= + ray start --address=
If you wish to specify that a machine has 10 CPUs and 1 GPU, you can do this with the flags ``--num-cpus=10`` and ``--num-gpus=1``. See the `Configuration `__ page for more information. @@ -56,7 +56,7 @@ the following. .. code-block:: python import ray - ray.init(redis_address="") + ray.init(address="
") Now you can define remote functions and execute tasks. For example, to verify that the correct number of nodes have joined the cluster, you can run the diff --git a/doc/source/walkthrough.rst b/doc/source/walkthrough.rst index a41f73174..0be6d9715 100644 --- a/doc/source/walkthrough.rst +++ b/doc/source/walkthrough.rst @@ -15,7 +15,7 @@ large cluster. To run this walkthrough, install Ray with ``pip install -U ray``. import ray # Start Ray. If you're connecting to an existing cluster, you would use - # ray.init(redis_address=) instead. + # ray.init(address=) instead. ray.init() See the `Configuration `__ documentation for the various ways to diff --git a/python/ray/node.py b/python/ray/node.py index d7edaa21a..d22819b22 100644 --- a/python/ray/node.py +++ b/python/ray/node.py @@ -201,6 +201,11 @@ class Node(object): """Get the cluster Redis address.""" return self._node_ip_address + @property + def address(self): + """Get the cluster address.""" + return self._redis_address + @property def redis_address(self): """Get the cluster Redis address.""" diff --git a/python/ray/tests/cluster_utils.py b/python/ray/tests/cluster_utils.py index 294872955..e92b1642f 100644 --- a/python/ray/tests/cluster_utils.py +++ b/python/ray/tests/cluster_utils.py @@ -45,13 +45,17 @@ class Cluster(object): if connect: self.connect() + @property + def address(self): + return self.redis_address + def connect(self): """Connect the driver to the cluster.""" assert self.redis_address is not None assert not self.connected output_info = ray.init( ignore_reinit_error=True, - redis_address=self.redis_address, + address=self.redis_address, redis_password=self.redis_password) logger.info(output_info) self.connected = True @@ -167,7 +171,7 @@ class Cluster(object): Exception: An exception is raised if we time out while waiting for nodes to join. """ - ip_address, port = self.redis_address.split(":") + ip_address, port = self.address.split(":") redis_client = redis.StrictRedis( host=ip_address, port=int(port), password=self.redis_password) diff --git a/python/ray/tests/conftest.py b/python/ray/tests/conftest.py index d87323893..b6062f2af 100644 --- a/python/ray/tests/conftest.py +++ b/python/ray/tests/conftest.py @@ -105,7 +105,7 @@ def _ray_start_cluster(**kwargs): for _ in range(num_nodes): remote_nodes.append(cluster.add_node(**init_kwargs)) if do_init: - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) yield cluster # The code after the yield will run as teardown code. ray.shutdown() @@ -158,12 +158,12 @@ def call_ray_start(request): subprocess.check_output(command_args, stderr=subprocess.STDOUT)) # Get the redis address from the output. redis_substring_prefix = "redis_address=\"" - redis_address_location = ( + address_location = ( out.find(redis_substring_prefix) + len(redis_substring_prefix)) - redis_address = out[redis_address_location:] - redis_address = redis_address.split("\"")[0] + address = out[address_location:] + address = address.split("\"")[0] - yield redis_address + yield address # Disconnect from the Ray cluster. ray.shutdown() @@ -182,7 +182,7 @@ def two_node_cluster(): for _ in range(2): remote_node = cluster.add_node( num_cpus=1, _internal_config=internal_config) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) yield cluster, remote_node # The code after the yield will run as teardown code. diff --git a/python/ray/tests/test_actor.py b/python/ray/tests/test_actor.py index fcb560b28..a20edcee6 100644 --- a/python/ray/tests/test_actor.py +++ b/python/ray/tests/test_actor.py @@ -869,7 +869,7 @@ def test_actor_load_balancing(ray_start_cluster): num_nodes = 3 for i in range(num_nodes): cluster.add_node(num_cpus=1) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote class Actor1(object): @@ -916,7 +916,7 @@ def test_actor_lifetime_load_balancing(ray_start_cluster): num_nodes = 3 for i in range(num_nodes): cluster.add_node(num_cpus=1) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(num_cpus=1) class Actor(object): @@ -940,7 +940,7 @@ def test_actor_gpus(ray_start_cluster): for i in range(num_nodes): cluster.add_node( num_cpus=10 * num_gpus_per_raylet, num_gpus=num_gpus_per_raylet) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(num_gpus=1) class Actor1(object): @@ -979,7 +979,7 @@ def test_actor_multiple_gpus(ray_start_cluster): for i in range(num_nodes): cluster.add_node( num_cpus=10 * num_gpus_per_raylet, num_gpus=num_gpus_per_raylet) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(num_gpus=2) class Actor1(object): @@ -1049,7 +1049,7 @@ def test_actor_different_numbers_of_gpus(ray_start_cluster): cluster.add_node(num_cpus=10, num_gpus=0) cluster.add_node(num_cpus=10, num_gpus=5) cluster.add_node(num_cpus=10, num_gpus=10) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(num_gpus=1) class Actor1(object): @@ -1093,7 +1093,7 @@ def test_actor_multiple_gpus_from_multiple_tasks(ray_start_cluster): _internal_config=json.dumps({ "num_heartbeats_timeout": 1000 })) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote def create_actors(i, n): @@ -1168,7 +1168,7 @@ def test_actors_and_tasks_with_gpus(ray_start_cluster): for i in range(num_nodes): cluster.add_node( num_cpus=num_gpus_per_raylet, num_gpus=num_gpus_per_raylet) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) def check_intervals_non_overlapping(list_of_intervals): for i in range(len(list_of_intervals)): @@ -2034,7 +2034,7 @@ def test_custom_label_placement(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=2, resources={"CustomResource1": 2}) cluster.add_node(num_cpus=2, resources={"CustomResource2": 2}) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(resources={"CustomResource1": 1}) class ResourceActor1(object): diff --git a/python/ray/tests/test_basic.py b/python/ray/tests/test_basic.py index 28cf90256..c63f93bc1 100644 --- a/python/ray/tests/test_basic.py +++ b/python/ray/tests/test_basic.py @@ -1235,7 +1235,7 @@ def test_wait_cluster(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) cluster.add_node(num_cpus=1, resources={"RemoteResource": 1}) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(resources={"RemoteResource": 1}) def f(): @@ -1263,7 +1263,7 @@ def test_object_transfer_dump(ray_start_cluster): num_nodes = 3 for i in range(num_nodes): cluster.add_node(resources={str(i): 1}, object_store_memory=10**9) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote def f(x): @@ -1534,7 +1534,7 @@ def test_free_objects_multi_node(ray_start_cluster): num_cpus=1, resources={"Custom{}".format(i): 1}, _internal_config=config) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) class RawActor(object): def get(self): @@ -1996,7 +1996,7 @@ def test_zero_cpus_actor(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=0) cluster.add_node(num_cpus=2) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) local_plasma = ray.worker.global_worker.plasma_client.store_socket_name @@ -2069,7 +2069,7 @@ def test_multiple_raylets(ray_start_cluster): cluster.add_node(num_cpus=11, num_gpus=0) cluster.add_node(num_cpus=5, num_gpus=5) cluster.add_node(num_cpus=10, num_gpus=1) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) cluster.wait_for_nodes() # Define a bunch of remote functions that all return the socket name of @@ -2191,7 +2191,7 @@ def test_custom_resources(ray_start_cluster): cluster = ray_start_cluster cluster.add_node(num_cpus=3, resources={"CustomResource": 0}) cluster.add_node(num_cpus=3, resources={"CustomResource": 1}) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote def f(): @@ -2235,7 +2235,7 @@ def test_two_custom_resources(ray_start_cluster): "CustomResource1": 3, "CustomResource2": 4 }) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(resources={"CustomResource1": 1}) def f(): @@ -2468,7 +2468,7 @@ def test_load_balancing(ray_start_cluster): num_cpus = 7 for _ in range(num_nodes): cluster.add_node(num_cpus=num_cpus) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote def f(): @@ -2486,7 +2486,7 @@ def test_load_balancing_with_dependencies(ray_start_cluster): num_nodes = 3 for _ in range(num_nodes): cluster.add_node(num_cpus=1) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote def f(x): diff --git a/python/ray/tests/test_component_failures.py b/python/ray/tests/test_component_failures.py index e55793ab4..180230b5c 100644 --- a/python/ray/tests/test_component_failures.py +++ b/python/ray/tests/test_component_failures.py @@ -198,7 +198,7 @@ def ray_start_workers_separate_multinode(request): cluster = Cluster() for _ in range(num_nodes): cluster.add_node(num_cpus=num_initial_workers) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) yield num_nodes, num_initial_workers # The code after the yield will run as teardown code. diff --git a/python/ray/tests/test_credis.py b/python/ray/tests/test_credis.py index a64b4020b..455bcbab0 100644 --- a/python/ray/tests/test_credis.py +++ b/python/ray/tests/test_credis.py @@ -9,8 +9,8 @@ import ray def parse_client(addr_port_str): - redis_address, redis_port = addr_port_str.split(":") - return redis.StrictRedis(host=redis_address, port=redis_port) + address, redis_port = addr_port_str.split(":") + return redis.StrictRedis(host=address, port=redis_port) @unittest.skipIf(not os.environ.get("RAY_USE_NEW_GCS", False), diff --git a/python/ray/tests/test_dynres.py b/python/ray/tests/test_dynres.py index 0429e17b3..54cf186a3 100644 --- a/python/ray/tests/test_dynres.py +++ b/python/ray/tests/test_dynres.py @@ -86,7 +86,7 @@ def test_dynamic_res_updation_clientid(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) target_node_id = ray.nodes()[1]["NodeID"] @@ -120,7 +120,7 @@ def test_dynamic_res_creation_clientid(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) target_node_id = ray.nodes()[1]["NodeID"] @@ -150,7 +150,7 @@ def test_dynamic_res_creation_clientid_multiple(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) target_node_ids = [node["NodeID"] for node in ray.nodes()] @@ -191,7 +191,7 @@ def test_dynamic_res_deletion_clientid(ray_start_cluster): # target node cluster.add_node(resources={res_name: res_capacity}) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) target_node_id = ray.nodes()[1]["NodeID"] @@ -223,7 +223,7 @@ def test_dynamic_res_creation_scheduler_consistency(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] @@ -260,7 +260,7 @@ def test_dynamic_res_deletion_scheduler_consistency(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] @@ -313,7 +313,7 @@ def test_dynamic_res_concurrent_res_increment(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] target_node_id = node_ids[1] @@ -392,7 +392,7 @@ def test_dynamic_res_concurrent_res_decrement(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] target_node_id = node_ids[1] @@ -469,7 +469,7 @@ def test_dynamic_res_concurrent_res_delete(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] target_node_id = node_ids[1] @@ -538,7 +538,7 @@ def test_dynamic_res_creation_stress(ray_start_cluster): for i in range(num_nodes): cluster.add_node() - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) node_ids = [node["NodeID"] for node in ray.nodes()] target_node_id = node_ids[1] diff --git a/python/ray/tests/test_failure.py b/python/ray/tests/test_failure.py index e586902b8..4319c12c8 100644 --- a/python/ray/tests/test_failure.py +++ b/python/ray/tests/test_failure.py @@ -625,20 +625,18 @@ def test_warning_for_too_many_nested_tasks(shutdown_only): def test_redis_module_failure(ray_start_regular): address_info = ray_start_regular - redis_address = address_info["redis_address"] - redis_address = redis_address.split(":") - assert len(redis_address) == 2 + address = address_info["redis_address"] + address = address.split(":") + assert len(address) == 2 def run_failure_test(expecting_message, *command): with pytest.raises( Exception, match=".*{}.*".format(expecting_message)): - client = redis.StrictRedis( - host=redis_address[0], port=int(redis_address[1])) + client = redis.StrictRedis(host=address[0], port=int(address[1])) client.execute_command(*command) def run_one_command(*command): - client = redis.StrictRedis( - host=redis_address[0], port=int(redis_address[1])) + client = redis.StrictRedis(host=address[0], port=int(address[1])) client.execute_command(*command) run_failure_test("wrong number of arguments", "RAY.TABLE_ADD", 13) @@ -722,7 +720,7 @@ def test_connect_with_disconnected_node(shutdown_only): }) cluster = Cluster() cluster.add_node(num_cpus=0, _internal_config=config) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) info = relevant_errors(ray_constants.REMOVED_NODE_ERROR) assert len(info) == 0 # This node is killed by SIGKILL, ray_monitor will mark it to dead. diff --git a/python/ray/tests/test_monitors.py b/python/ray/tests/test_monitors.py index f80eb5e57..5b3c66627 100644 --- a/python/ray/tests/test_monitors.py +++ b/python/ray/tests/test_monitors.py @@ -25,7 +25,7 @@ def _test_cleanup_on_driver_exit(num_redis_shards): lines = [m.strip() for m in output.split("\n")] init_cmd = [m for m in lines if m.startswith("ray.init")] assert 1 == len(init_cmd) - redis_address = init_cmd[0].split("redis_address=\"")[-1][:-2] + address = init_cmd[0].split("address=\"")[-1][:-2] max_attempts_before_failing = 100 # Wait for monitor.py to start working. time.sleep(2) @@ -38,7 +38,7 @@ def _test_cleanup_on_driver_exit(num_redis_shards): def Driver(success): success.value = True # Start driver. - ray.init(redis_address=redis_address) + ray.init(address=address) summary_start = StateSummary() if (0, 1) != summary_start: success.value = False @@ -81,7 +81,7 @@ def _test_cleanup_on_driver_exit(num_redis_shards): # Just make sure Driver() is run and succeeded. assert success.value # Check that objects, tasks, and functions are cleaned up. - ray.init(redis_address=redis_address) + ray.init(address=address) attempts = 0 while (0, 1) != StateSummary(): time.sleep(0.1) diff --git a/python/ray/tests/test_multi_node.py b/python/ray/tests/test_multi_node.py index 446c34d80..411c7669c 100644 --- a/python/ray/tests/test_multi_node.py +++ b/python/ray/tests/test_multi_node.py @@ -14,9 +14,9 @@ from ray.tests.utils import (run_string_as_driver, def test_error_isolation(call_ray_start): - redis_address = call_ray_start + address = call_ray_start # Connect a driver to the Ray cluster. - ray.init(redis_address=redis_address) + ray.init(address=address) # There shouldn't be any errors yet. assert len(ray.errors()) == 0 @@ -48,7 +48,7 @@ def test_error_isolation(call_ray_start): import ray import time -ray.init(redis_address="{}") +ray.init(address="{}") time.sleep(1) assert len(ray.errors()) == 0 @@ -70,7 +70,7 @@ assert len(ray.errors()) == 1 assert "{}" in ray.errors()[0]["message"] print("success") -""".format(redis_address, error_string2, error_string2) +""".format(address, error_string2, error_string2) out = run_string_as_driver(driver_script) # Make sure the other driver succeeded. @@ -85,16 +85,16 @@ print("success") def test_remote_function_isolation(call_ray_start): # This test will run multiple remote functions with the same names in # two different drivers. Connect a driver to the Ray cluster. - redis_address = call_ray_start + address = call_ray_start - ray.init(redis_address=redis_address) + ray.init(address=address) # Start another driver and make sure that it can define and call its # own commands with the same names. driver_script = """ import ray import time -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def f(): return 3 @@ -105,7 +105,7 @@ for _ in range(10000): result = ray.get([f.remote(), g.remote(0, 0)]) assert result == [3, 4] print("success") -""".format(redis_address) +""".format(address) out = run_string_as_driver(driver_script) @@ -128,32 +128,32 @@ print("success") def test_driver_exiting_quickly(call_ray_start): # This test will create some drivers that submit some tasks and then # exit without waiting for the tasks to complete. - redis_address = call_ray_start + address = call_ray_start - ray.init(redis_address=redis_address) + ray.init(address=address) # Define a driver that creates an actor and exits. driver_script1 = """ import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote class Foo(object): def __init__(self): pass Foo.remote() print("success") -""".format(redis_address) +""".format(address) # Define a driver that creates some tasks and exits. driver_script2 = """ import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def f(): return 1 f.remote() print("success") -""".format(redis_address) +""".format(address) # Create some drivers and let them exit and make sure everything is # still alive. @@ -205,14 +205,14 @@ ray.get([a.log.remote(), f.remote()]) "call_ray_start", ["ray start --head --num-cpus=1 --num-gpus=1"], indirect=True) def test_drivers_release_resources(call_ray_start): - redis_address = call_ray_start + address = call_ray_start # Define a driver that creates an actor and exits. driver_script1 = """ import time import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def f(duration): @@ -237,7 +237,7 @@ foos = [Foo.remote() for _ in range(100)] [f.remote(10 ** 6) for _ in range(100)] print("success") -""".format(redis_address) +""".format(address) driver_script2 = (driver_script1 + "import sys\nsys.stdout.flush()\ntime.sleep(10 ** 6)\n") @@ -392,7 +392,7 @@ def test_calling_start_ray_head(): ], indirect=True) def test_using_hostnames(call_ray_start): - ray.init(node_ip_address="localhost", redis_address="localhost:6379") + ray.init(node_ip_address="localhost", address="localhost:6379") @ray.remote def f(): @@ -407,7 +407,7 @@ def test_connecting_in_local_case(ray_start_regular): # Define a driver that just connects to Redis. driver_script = """ import ray -ray.init(redis_address="{}") +ray.init(address="{}") print("success") """.format(address_info["redis_address"]) @@ -436,7 +436,7 @@ def train_func(config, reporter): # add a reporter arg reporter(timesteps_total=i, mean_accuracy=i+97) # report metrics os.environ["TUNE_RESUME_PROMPT_OFF"] = "True" -ray.init(redis_address="{}") +ray.init(address="{}") ray.tune.register_trainable("train_func", train_func) tune.run_experiments({{ @@ -463,16 +463,16 @@ print("success") def test_driver_exiting_when_worker_blocked(call_ray_start): # This test will create some drivers that submit some tasks and then # exit without waiting for the tasks to complete. - redis_address = call_ray_start + address = call_ray_start - ray.init(redis_address=redis_address) + ray.init(address=address) # Define a driver that creates two tasks, one that runs forever and the # other blocked on the first in a `ray.get`. driver_script = """ import time import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def f(): time.sleep(10**6) @@ -482,7 +482,7 @@ def g(): g.remote() time.sleep(1) print("success") -""".format(redis_address) +""".format(address) # Create some drivers and let them exit and make sure everything is # still alive. @@ -496,7 +496,7 @@ print("success") driver_script = """ import time import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def f(): time.sleep(10**6) @@ -506,7 +506,7 @@ def g(): g.remote() time.sleep(1) print("success") -""".format(redis_address) +""".format(address) # Create some drivers and let them exit and make sure everything is # still alive. @@ -520,7 +520,7 @@ print("success") driver_script_template = """ import time import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def g(x): return @@ -534,7 +534,7 @@ print("success") for _ in range(3): nonexistent_id_bytes = _random_string() nonexistent_id_hex = ray.utils.binary_to_hex(nonexistent_id_bytes) - driver_script = driver_script_template.format(redis_address, + driver_script = driver_script_template.format(address, nonexistent_id_hex) out = run_string_as_driver(driver_script) # Simulate the nonexistent dependency becoming available. @@ -547,7 +547,7 @@ print("success") driver_script_template = """ import time import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote def g(): ray.wait(ray.ObjectID(ray.utils.hex_to_binary("{}"))) @@ -561,7 +561,7 @@ print("success") for _ in range(3): nonexistent_id_bytes = _random_string() nonexistent_id_hex = ray.utils.binary_to_hex(nonexistent_id_bytes) - driver_script = driver_script_template.format(redis_address, + driver_script = driver_script_template.format(address, nonexistent_id_hex) out = run_string_as_driver(driver_script) # Simulate the nonexistent dependency becoming available. diff --git a/python/ray/tests/test_multi_node_2.py b/python/ray/tests/test_multi_node_2.py index b956529c6..b4f8b12e5 100644 --- a/python/ray/tests/test_multi_node_2.py +++ b/python/ray/tests/test_multi_node_2.py @@ -59,8 +59,8 @@ def test_internal_config(ray_start_cluster_head): assert ray.cluster_resources()["CPU"] == 1 -def setup_monitor(redis_address): - monitor = Monitor(redis_address, None) +def setup_monitor(address): + monitor = Monitor(address, None) monitor.subscribe(ray.gcs_utils.XRAY_HEARTBEAT_BATCH_CHANNEL) monitor.subscribe(ray.gcs_utils.XRAY_JOB_CHANNEL) # TODO: Remove? monitor.update_raylet_map(_append_port=True) @@ -113,7 +113,7 @@ def test_heartbeats_single(ray_start_cluster_head): """ cluster = ray_start_cluster_head timeout = 5 - monitor = setup_monitor(cluster.redis_address) + monitor = setup_monitor(cluster.address) total_cpus = ray.state.cluster_resources()["CPU"] verify_load_metrics(monitor, (0.0, {"CPU": 0.0}, {"CPU": total_cpus})) @@ -159,7 +159,7 @@ def test_heartbeats_cluster(ray_start_cluster_head): num_nodes_total = int(num_workers_nodes + 1) [cluster.add_node() for i in range(num_workers_nodes)] cluster.wait_for_nodes() - monitor = setup_monitor(cluster.redis_address) + monitor = setup_monitor(cluster.address) verify_load_metrics(monitor, (0.0, {"CPU": 0.0}, {"CPU": num_nodes_total})) diff --git a/python/ray/tests/test_node_manager.py b/python/ray/tests/test_node_manager.py index fc195d3a2..d382e49ec 100644 --- a/python/ray/tests/test_node_manager.py +++ b/python/ray/tests/test_node_manager.py @@ -16,7 +16,7 @@ def test_infeasible_tasks(ray_start_cluster): return cluster.add_node(resources={str(0): 100}) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) # Submit an infeasible task. x_id = f._submit(args=[], kwargs={}, resources={str(1): 1}) @@ -30,14 +30,14 @@ def test_infeasible_tasks(ray_start_cluster): driver_script = """ import ray -ray.init(redis_address="{}") +ray.init(address="{}") @ray.remote(resources={}) def f(): {}pass # This is a weird hack to insert some blank space. f.remote() -""".format(cluster.redis_address, "{str(2): 1}", " ") +""".format(cluster.address, "{str(2): 1}", " ") run_string_as_driver(driver_script) diff --git a/python/ray/tests/test_object_manager.py b/python/ray/tests/test_object_manager.py index a7cf48e09..5dd3a2a8a 100644 --- a/python/ray/tests/test_object_manager.py +++ b/python/ray/tests/test_object_manager.py @@ -28,7 +28,7 @@ def create_cluster(num_nodes): for i in range(num_nodes): cluster.add_node(resources={str(i): 100}, object_store_memory=10**9) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) return cluster @@ -226,7 +226,7 @@ def test_object_transfer_retry(ray_start_cluster): num_gpus=1, object_store_memory=object_store_memory, _internal_config=config) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) @ray.remote(num_gpus=1) def f(size): diff --git a/python/ray/tests/test_ray_init.py b/python/ray/tests/test_ray_init.py index cbeebc6e6..050215755 100644 --- a/python/ray/tests/test_ray_init.py +++ b/python/ray/tests/test_ray_init.py @@ -28,8 +28,8 @@ class TestRedisPassword(object): return 1 info = ray.init(redis_password=password) - redis_address = info["redis_address"] - redis_ip, redis_port = redis_address.split(":") + address = info["redis_address"] + redis_ip, redis_port = address.split(":") # Check that we can run a task object_id = f.remote() diff --git a/python/ray/tests/test_stress.py b/python/ray/tests/test_stress.py index 16309f146..036ccda98 100644 --- a/python/ray/tests/test_stress.py +++ b/python/ray/tests/test_stress.py @@ -49,7 +49,7 @@ def ray_start_combination(request): }) for i in range(num_nodes - 1): cluster.add_node(num_cpus=10) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) yield num_nodes, num_workers_per_scheduler, cluster # The code after the yield will run as teardown code. @@ -219,7 +219,7 @@ def ray_start_reconstruction(request): _internal_config=json.dumps({ "initial_reconstruction_timeout_milliseconds": 200 })) - ray.init(redis_address=cluster.redis_address) + ray.init(address=cluster.address) yield plasma_store_memory, num_nodes, cluster diff --git a/python/ray/tests/test_tempfile.py b/python/ray/tests/test_tempfile.py index 2ba51c167..9981dd526 100644 --- a/python/ray/tests/test_tempfile.py +++ b/python/ray/tests/test_tempfile.py @@ -14,7 +14,7 @@ def test_conn_cluster(): # plasma_store_socket_name with pytest.raises(Exception) as exc_info: ray.init( - redis_address="127.0.0.1:6379", + address="127.0.0.1:6379", plasma_store_socket_name="/tmp/this_should_fail") assert exc_info.value.args[0] == ( "When connecting to an existing cluster, " @@ -23,7 +23,7 @@ def test_conn_cluster(): # raylet_socket_name with pytest.raises(Exception) as exc_info: ray.init( - redis_address="127.0.0.1:6379", + address="127.0.0.1:6379", raylet_socket_name="/tmp/this_should_fail") assert exc_info.value.args[0] == ( "When connecting to an existing cluster, " @@ -31,8 +31,7 @@ def test_conn_cluster(): # temp_dir with pytest.raises(Exception) as exc_info: - ray.init( - redis_address="127.0.0.1:6379", temp_dir="/tmp/this_should_fail") + ray.init(address="127.0.0.1:6379", temp_dir="/tmp/this_should_fail") assert exc_info.value.args[0] == ( "When connecting to an existing cluster, " "temp_dir must not be provided.") diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py index a5a5bb4e0..e5807b509 100644 --- a/python/ray/tune/examples/async_hyperband_example.py +++ b/python/ray/tune/examples/async_hyperband_example.py @@ -51,10 +51,10 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( - "--ray-redis-address", + "--ray-address", help="Address of Ray cluster for seamless distributed execution.") args, _ = parser.parse_known_args() - ray.init(redis_address=args.ray_redis_address) + ray.init(address=args.ray_address) # asynchronous hyperband early stopping, configured with # `episode_reward_mean` as the diff --git a/python/ray/tune/examples/bohb_example.py b/python/ray/tune/examples/bohb_example.py index 4cd4169a2..802390fbe 100644 --- a/python/ray/tune/examples/bohb_example.py +++ b/python/ray/tune/examples/bohb_example.py @@ -19,7 +19,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( - "--ray-redis-address", + "--ray-address", help="Address of Ray cluster for seamless distributed execution.") args, _ = parser.parse_known_args() @@ -56,7 +56,7 @@ class MyTrainableClass(Trainable): if __name__ == "__main__": import ConfigSpace as CS - ray.init(redis_address=args.ray_redis_address) + ray.init(address=args.ray_address) # BOHB uses ConfigSpace for their hyperparameter search space config_space = CS.ConfigurationSpace() diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py index be64b977b..99cc38b27 100644 --- a/python/ray/tune/examples/mnist_pytorch.py +++ b/python/ray/tune/examples/mnist_pytorch.py @@ -116,11 +116,11 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( - "--ray-redis-address", + "--ray-address", help="Address of Ray cluster for seamless distributed execution.") args = parser.parse_args() - if args.ray_redis_address: - ray.init(redis_address=args.ray_redis_address) + if args.ray_address: + ray.init(address=args.ray_address) sched = AsyncHyperBandScheduler( time_attr="training_iteration", metric="mean_accuracy") analysis = tune.run( diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py index 65b756666..c10793dd9 100644 --- a/python/ray/tune/examples/mnist_pytorch_trainable.py +++ b/python/ray/tune/examples/mnist_pytorch_trainable.py @@ -25,7 +25,7 @@ parser.add_argument( default=False, help="enables CUDA training") parser.add_argument( - "--ray-redis-address", type=str, help="The Redis address of the cluster.") + "--ray-address", type=str, help="The Redis address of the cluster.") parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") @@ -64,7 +64,7 @@ class TrainMNIST(tune.Trainable): if __name__ == "__main__": args = parser.parse_args() - ray.init(redis_address=args.ray_redis_address) + ray.init(address=args.ray_address) sched = ASHAScheduler(metric="mean_accuracy") analysis = tune.run( TrainMNIST, diff --git a/python/ray/tune/tests/example.py b/python/ray/tune/tests/example.py index b6cb1f885..aa1ab7d43 100644 --- a/python/ray/tune/tests/example.py +++ b/python/ray/tune/tests/example.py @@ -6,9 +6,9 @@ # import ray # import argparse # parser = argparse.ArgumentParser() -# parser.add_argument("--redis-address") +# parser.add_argument("--address") # args = parser.parse_args() -# ray.init(redis_address=args.redis_address) +# ray.init(address=args.address) # __quick_start_begin__ import torch.optim as optim diff --git a/python/ray/tune/tests/test_cluster.py b/python/ray/tune/tests/test_cluster.py index 0eb8c8ab8..c11afa509 100644 --- a/python/ray/tune/tests/test_cluster.py +++ b/python/ray/tune/tests/test_cluster.py @@ -357,7 +357,7 @@ import time import ray from ray import tune -ray.init(redis_address="{redis_address}") +ray.init(address="{address}") tune.run( @@ -372,7 +372,7 @@ tune.run( dict(experiment=kwargs), raise_on_failed_trial=False) """.format( - redis_address=cluster.redis_address, checkpoint_dir=dirpath) + address=cluster.address, checkpoint_dir=dirpath) run_string_as_driver_nonblocking(script) # Wait until the right checkpoint is saved. # The trainable returns every 0.5 seconds, so this should not miss @@ -446,7 +446,7 @@ import time import ray from ray import tune -ray.init(redis_address="{redis_address}") +ray.init(address="{address}") {fail_class_code} @@ -460,7 +460,7 @@ tune.run( max_failures=1, raise_on_failed_trial=False) """.format( - redis_address=cluster.redis_address, + address=cluster.address, checkpoint_dir=dirpath, fail_class_code=reformatted, fail_class=_Mock.__name__) diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py index be5b5520b..1e3cad9ea 100644 --- a/python/ray/tune/tests/tutorial.py +++ b/python/ray/tune/tests/tutorial.py @@ -40,7 +40,7 @@ search_space = { } # Uncomment this to enable distributed execution -# `ray.init(redis_address=...)` +# `ray.init(address=...)` analysis = tune.run(train_mnist, config=search_space) # __eval_func_end__ diff --git a/python/ray/worker.py b/python/ray/worker.py index 73a046e50..011b73ee8 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -1298,8 +1298,8 @@ def _initialize_serialization(job_id, worker=global_worker): class_id="ray.signature.FunctionSignature") -def init(redis_address=None, - address=None, +def init(address=None, + redis_address=None, num_cpus=None, num_gpus=None, memory=None, @@ -1346,14 +1346,14 @@ def init(redis_address=None, .. code-block:: python - ray.init(redis_address="123.45.67.89:6379") + ray.init(address="123.45.67.89:6379") Args: - redis_address (str): The address of the Redis server to connect to. If + address (str): The address of the Ray cluster to connect to. If this address is not provided, then this command will start Redis, a raylet, a plasma store, a plasma manager, and some workers. It will also kill these processes when Python exits. - address (str): Same as redis_address. + redis_address (str): Deprecated; same as address. num_cpus (int): Number of cpus the user wishes all raylets to be configured with. num_gpus (int): Number of gpus the user wishes all raylets to diff --git a/rllib/train.py b/rllib/train.py index c726e7dc0..8e6b2b3bb 100755 --- a/rllib/train.py +++ b/rllib/train.py @@ -156,7 +156,7 @@ def run(args, parser): object_store_memory=args.ray_object_store_memory, memory=args.ray_memory, redis_max_memory=args.ray_redis_max_memory) - ray.init(address=cluster.redis_address) + ray.init(address=cluster.address) else: ray.init( address=args.ray_address,