mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 22:47:18 +08:00
Enable starting and stopping ray with "ray start" and "ray stop". (#628)
* Install start_ray and stop_ray scripts in setup.py. * Update documentation. * Fix docker tests. * Implement stop_ray script in python. * Fix linting.
This commit is contained in:
committed by
Philipp Moritz
parent
a4d8e13094
commit
1a682e2807
@@ -20,7 +20,7 @@ fi
|
||||
if [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "linux" ]]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake build-essential autoconf curl libtool python-dev python-numpy python-pip libboost-all-dev unzip
|
||||
sudo pip install cloudpickle funcsigs colorama psutil redis tensorflow flatbuffers
|
||||
sudo pip install cloudpickle funcsigs click colorama psutil redis tensorflow flatbuffers
|
||||
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake python-dev python-numpy build-essential autoconf curl libtool libboost-all-dev unzip
|
||||
@@ -28,7 +28,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "linux" ]]; then
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
pip install numpy cloudpickle funcsigs colorama psutil redis tensorflow flatbuffers
|
||||
pip install numpy cloudpickle funcsigs click colorama psutil redis tensorflow flatbuffers
|
||||
elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
|
||||
# check that brew is installed
|
||||
which -s brew
|
||||
@@ -41,7 +41,7 @@ elif [[ "$PYTHON" == "2.7" ]] && [[ "$platform" == "macosx" ]]; then
|
||||
fi
|
||||
brew install cmake automake autoconf libtool boost
|
||||
sudo easy_install pip
|
||||
sudo pip install numpy cloudpickle funcsigs colorama psutil redis tensorflow flatbuffers --ignore-installed six
|
||||
sudo pip install numpy cloudpickle funcsigs click colorama psutil redis tensorflow flatbuffers --ignore-installed six
|
||||
elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
|
||||
# check that brew is installed
|
||||
which -s brew
|
||||
@@ -57,7 +57,7 @@ elif [[ "$PYTHON" == "3.5" ]] && [[ "$platform" == "macosx" ]]; then
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
|
||||
bash miniconda.sh -b -p $HOME/miniconda
|
||||
export PATH="$HOME/miniconda/bin:$PATH"
|
||||
pip install numpy cloudpickle funcsigs colorama psutil redis tensorflow flatbuffers
|
||||
pip install numpy cloudpickle funcsigs click colorama psutil redis tensorflow flatbuffers
|
||||
elif [[ "$LINT" == "1" ]]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y cmake build-essential autoconf curl libtool libboost-all-dev unzip
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
colorama
|
||||
click
|
||||
cloudpickle
|
||||
funcsigs
|
||||
mock
|
||||
|
||||
@@ -17,7 +17,7 @@ To install Ray, first install the following dependencies. We recommend using
|
||||
brew update
|
||||
brew install cmake automake autoconf libtool boost wget
|
||||
|
||||
pip install numpy cloudpickle funcsigs colorama psutil redis flatbuffers --ignore-installed six
|
||||
pip install numpy cloudpickle funcsigs click colorama psutil redis flatbuffers --ignore-installed six
|
||||
|
||||
If you are using Anaconda, you may also need to run the following.
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ To install Ray, first install the following dependencies. We recommend using
|
||||
sudo apt-get install python-dev # For Python 2.
|
||||
sudo apt-get install python3-dev # For Python 3.
|
||||
|
||||
pip install numpy cloudpickle funcsigs colorama psutil redis flatbuffers
|
||||
pip install numpy cloudpickle funcsigs click colorama psutil redis flatbuffers
|
||||
|
||||
|
||||
If you are using Anaconda, you may also need to run the following.
|
||||
|
||||
@@ -165,7 +165,7 @@ eval $(aws ecr get-login --region <region>)
|
||||
docker run \
|
||||
-d --shm-size=<shm-size> --net=host \
|
||||
<repository-uri> \
|
||||
/ray/scripts/start_ray.sh --head \
|
||||
ray start --head \
|
||||
--object-manager-port=8076 \
|
||||
--redis-port=6379 \
|
||||
--num-workers=<num-workers>
|
||||
@@ -182,7 +182,7 @@ To start Ray on the worker nodes create a script `start-worker-docker.sh` with c
|
||||
eval $(aws ecr get-login --region <region>)
|
||||
docker run -d --shm-size=<shm-size> --net=host \
|
||||
<repository-uri> \
|
||||
/ray/scripts/start_ray.sh \
|
||||
ray start \
|
||||
--object-manager-port=8076 \
|
||||
--redis-address=<redis-address> \
|
||||
--num-workers=<num-workers>
|
||||
|
||||
@@ -24,7 +24,7 @@ If the ``--redis-port`` argument is omitted, Ray will choose a port at random.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./ray/scripts/start_ray.sh --head --redis-port=6379
|
||||
ray start --head --redis-port=6379
|
||||
|
||||
The command will print out the address of the Redis server that was started
|
||||
(and some other address information).
|
||||
@@ -35,7 +35,7 @@ should look something like ``123.45.67.89:6379``).
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./ray/scripts/start_ray.sh --redis-address=<redis-address>
|
||||
ray start --redis-address=<redis-address>
|
||||
|
||||
If you wish to specify that a machine has 10 CPUs and 1 GPU, you can do this
|
||||
with the flags ``--num-cpus=10`` and ``--num-gpus=1``. If these flags are not
|
||||
@@ -77,5 +77,4 @@ following.
|
||||
Stopping Ray
|
||||
~~~~~~~~~~~~
|
||||
|
||||
When you want to stop the Ray processes, run ``./ray/scripts/stop_ray.sh`` on
|
||||
each node.
|
||||
When you want to stop the Ray processes, run ``ray stop`` on each node.
|
||||
|
||||
@@ -102,7 +102,7 @@ On the head node, run the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
./ray/scripts/start_ray.sh --head --redis-port=6379
|
||||
ray start --head --redis-port=6379
|
||||
|
||||
|
||||
**Start Ray on the worker nodes**
|
||||
@@ -114,7 +114,7 @@ Create a file ``start_worker.sh`` that contains something like the following:
|
||||
# Make sure the SSH session has the correct version of Python on its path.
|
||||
# You will probably have to change the line below.
|
||||
export PATH=/home/ubuntu/anaconda3/bin/:$PATH
|
||||
ray/scripts/start_ray.sh --redis-address=<head-node-ip>:6379
|
||||
ray start --redis-address=<head-node-ip>:6379
|
||||
|
||||
This script, when run on the worker nodes, will start up Ray. You will need to
|
||||
replace ``<head-node-ip>`` with the IP address that worker nodes will use to
|
||||
@@ -186,18 +186,30 @@ Stopping Ray
|
||||
|
||||
**Stop Ray on worker nodes**
|
||||
|
||||
Create a file ``stop_worker.sh`` that contains something like the following:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
parallel-ssh -h workers.txt -P ray/scripts/stop_ray.sh
|
||||
# Make sure the SSH session has the correct version of Python on its path.
|
||||
# You will probably have to change the line below.
|
||||
export PATH=/home/ubuntu/anaconda3/bin/:$PATH
|
||||
ray stop
|
||||
|
||||
This command will execute the ``stop_ray.sh`` script on each of the worker
|
||||
nodes.
|
||||
This script, when run on the worker nodes, will stop Ray. Note, you will need to
|
||||
replace ``/home/ubuntu/anaconda3/bin/`` with the correct path to your Python
|
||||
installation.
|
||||
|
||||
Now use ``parallel-ssh`` to stop Ray on each worker node.
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
parallel-ssh -h workers.txt -P -I < stop_worker.sh
|
||||
|
||||
**Stop Ray on the head node**
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
ray/scripts/stop_ray.sh
|
||||
ray stop
|
||||
|
||||
Upgrading Ray
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
@@ -7,4 +7,3 @@ ADD git-rev /ray/git-rev
|
||||
WORKDIR /ray/python
|
||||
RUN python setup.py install
|
||||
WORKDIR /ray
|
||||
RUN echo "tail -f /dev/null" >> scripts/start_ray.sh
|
||||
|
||||
@@ -2,33 +2,12 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
import click
|
||||
import redis
|
||||
import subprocess
|
||||
|
||||
import ray.services as services
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Start the Ray processes on a node.")
|
||||
parser.add_argument("--node-ip-address", required=False, type=str,
|
||||
help="the IP address of this node")
|
||||
parser.add_argument("--redis-address", required=False, type=str,
|
||||
help="the address to use for connecting to Redis")
|
||||
parser.add_argument("--redis-port", required=False, type=str,
|
||||
help="the port to use for starting Redis")
|
||||
parser.add_argument("--num-redis-shards", required=False, type=int,
|
||||
help=("the number of additional Redis shards to use in "
|
||||
"addition to the primary Redis shard"))
|
||||
parser.add_argument("--object-manager-port", required=False, type=int,
|
||||
help="the port to use for starting the object manager")
|
||||
parser.add_argument("--num-workers", required=False, type=int,
|
||||
help="the initial number of workers to start on this node")
|
||||
parser.add_argument("--num-cpus", required=False, type=int,
|
||||
help="the number of CPUs on this node")
|
||||
parser.add_argument("--num-gpus", required=False, type=int,
|
||||
help="the number of GPUs on this node")
|
||||
parser.add_argument("--head", action="store_true",
|
||||
help="provide this argument for the head node")
|
||||
|
||||
|
||||
def check_no_existing_redis_clients(node_ip_address, redis_address):
|
||||
redis_ip_address, redis_port = redis_address.split(":")
|
||||
@@ -56,48 +35,72 @@ def check_no_existing_redis_clients(node_ip_address, redis_address):
|
||||
"with this IP address.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
@click.group()
|
||||
def cli():
|
||||
pass
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--node-ip-address", required=False, type=str,
|
||||
help="the IP address of this node")
|
||||
@click.option("--redis-address", required=False, type=str,
|
||||
help="the address to use for connecting to Redis")
|
||||
@click.option("--redis-port", required=False, type=str,
|
||||
help="the port to use for starting Redis")
|
||||
@click.option("--num-redis-shards", required=False, type=int,
|
||||
help=("the number of additional Redis shards to use in "
|
||||
"addition to the primary Redis shard"))
|
||||
@click.option("--object-manager-port", required=False, type=int,
|
||||
help="the port to use for starting the object manager")
|
||||
@click.option("--num-workers", required=False, type=int,
|
||||
help="the initial number of workers to start on this node")
|
||||
@click.option("--num-cpus", required=False, type=int,
|
||||
help="the number of CPUs on this node")
|
||||
@click.option("--num-gpus", required=False, type=int,
|
||||
help="the number of GPUs on this node")
|
||||
@click.option("--head", is_flag=True, default=False,
|
||||
help="provide this argument for the head node")
|
||||
@click.option("--block", is_flag=True, default=False,
|
||||
help="provide this argument to block forever in this command")
|
||||
def start(node_ip_address, redis_address, redis_port, num_redis_shards,
|
||||
object_manager_port, num_workers, num_cpus, num_gpus, head, block):
|
||||
# Note that we redirect stdout and stderr to /dev/null because otherwise
|
||||
# attempts to print may cause exceptions if a process is started inside of an
|
||||
# SSH connection and the SSH connection dies. TODO(rkn): This is a temporary
|
||||
# fix. We should actually redirect stdout and stderr to Redis in some way.
|
||||
|
||||
if args.head:
|
||||
if head:
|
||||
# Start Ray on the head node.
|
||||
if args.redis_address is not None:
|
||||
if redis_address is not None:
|
||||
raise Exception("If --head is passed in, a Redis server will be "
|
||||
"started, so a Redis address should not be provided.")
|
||||
|
||||
# Get the node IP address if one is not provided.
|
||||
if args.node_ip_address is None:
|
||||
if node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address()
|
||||
else:
|
||||
node_ip_address = args.node_ip_address
|
||||
print("Using IP address {} for this node.".format(node_ip_address))
|
||||
|
||||
address_info = {}
|
||||
# Use the provided object manager port if there is one.
|
||||
if args.object_manager_port is not None:
|
||||
address_info["object_manager_ports"] = [args.object_manager_port]
|
||||
if object_manager_port is not None:
|
||||
address_info["object_manager_ports"] = [object_manager_port]
|
||||
if address_info == {}:
|
||||
address_info = None
|
||||
|
||||
address_info = services.start_ray_head(
|
||||
address_info=address_info,
|
||||
node_ip_address=node_ip_address,
|
||||
redis_port=args.redis_port,
|
||||
num_workers=args.num_workers,
|
||||
redis_port=redis_port,
|
||||
num_workers=num_workers,
|
||||
cleanup=False,
|
||||
redirect_output=True,
|
||||
num_cpus=args.num_cpus,
|
||||
num_gpus=args.num_gpus,
|
||||
num_redis_shards=args.num_redis_shards)
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus,
|
||||
num_redis_shards=num_redis_shards)
|
||||
print(address_info)
|
||||
print("\nStarted Ray on this node. You can add additional nodes to the "
|
||||
"cluster by calling\n\n"
|
||||
" ./scripts/start_ray.sh --redis-address {}\n\n"
|
||||
" ray start --redis-address {}\n\n"
|
||||
"from the node you wish to add. You can connect a driver to the "
|
||||
"cluster from Python by running\n\n"
|
||||
" import ray\n"
|
||||
@@ -105,43 +108,80 @@ if __name__ == "__main__":
|
||||
"If you have trouble connecting from a different machine, check "
|
||||
"that your firewall is configured properly. If you wish to "
|
||||
"terminate the processes that have been started, run\n\n"
|
||||
" ./scripts/stop_ray.sh".format(address_info["redis_address"],
|
||||
address_info["redis_address"]))
|
||||
" ray stop".format(address_info["redis_address"],
|
||||
address_info["redis_address"]))
|
||||
else:
|
||||
# Start Ray on a non-head node.
|
||||
if args.redis_port is not None:
|
||||
if redis_port is not None:
|
||||
raise Exception("If --head is not passed in, --redis-port is not "
|
||||
"allowed")
|
||||
if args.redis_address is None:
|
||||
if redis_address is None:
|
||||
raise Exception("If --head is not passed in, --redis-address must be "
|
||||
"provided.")
|
||||
if args.num_redis_shards is not None:
|
||||
if num_redis_shards is not None:
|
||||
raise Exception("If --head is not passed in, --num-redis-shards must "
|
||||
"not be provided.")
|
||||
redis_ip_address, redis_port = args.redis_address.split(":")
|
||||
redis_ip_address, redis_port = redis_address.split(":")
|
||||
# Wait for the Redis server to be started. And throw an exception if we
|
||||
# can't connect to it.
|
||||
services.wait_for_redis_to_start(redis_ip_address, int(redis_port))
|
||||
# Get the node IP address if one is not provided.
|
||||
if args.node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address(args.redis_address)
|
||||
else:
|
||||
node_ip_address = args.node_ip_address
|
||||
if node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address(redis_address)
|
||||
print("Using IP address {} for this node.".format(node_ip_address))
|
||||
# Check that there aren't already Redis clients with the same IP address
|
||||
# connected with this Redis instance. This raises an exception if the Redis
|
||||
# server already has clients on this node.
|
||||
check_no_existing_redis_clients(node_ip_address, args.redis_address)
|
||||
check_no_existing_redis_clients(node_ip_address, redis_address)
|
||||
address_info = services.start_ray_node(
|
||||
node_ip_address=node_ip_address,
|
||||
redis_address=args.redis_address,
|
||||
object_manager_ports=[args.object_manager_port],
|
||||
num_workers=args.num_workers,
|
||||
redis_address=redis_address,
|
||||
object_manager_ports=[object_manager_port],
|
||||
num_workers=num_workers,
|
||||
cleanup=False,
|
||||
redirect_output=True,
|
||||
num_cpus=args.num_cpus,
|
||||
num_gpus=args.num_gpus)
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus)
|
||||
print(address_info)
|
||||
print("\nStarted Ray on this node. If you wish to terminate the processes "
|
||||
"that have been started, run\n\n"
|
||||
" ./scripts/stop_ray.sh")
|
||||
" ray stop")
|
||||
|
||||
if block:
|
||||
import time
|
||||
while True:
|
||||
time.sleep(30)
|
||||
|
||||
|
||||
@click.command()
|
||||
def stop():
|
||||
subprocess.call(["killall global_scheduler plasma_store plasma_manager "
|
||||
"local_scheduler"], shell=True)
|
||||
|
||||
# Find the PID of the monitor process and kill it.
|
||||
subprocess.call(["kill $(ps aux | grep monitor.py | awk '{ print $2 }') "
|
||||
"2> /dev/null"], shell=True)
|
||||
|
||||
# Find the PID of the Redis process and kill it.
|
||||
subprocess.call(["kill $(ps aux | grep redis-server | awk '{ print $2 }') "
|
||||
"2> /dev/null"], shell=True)
|
||||
|
||||
# Find the PIDs of the worker processes and kill them.
|
||||
subprocess.call(["kill $(ps aux | grep default_worker.py | "
|
||||
"awk '{ print $2 }') 2> /dev/null"], shell=True)
|
||||
|
||||
# Find the PID of the Ray log monitor process and kill it.
|
||||
subprocess.call(["kill $(ps aux | grep log_monitor.py | "
|
||||
"awk '{ print $2 }') 2> /dev/null"], shell=True)
|
||||
|
||||
|
||||
cli.add_command(start)
|
||||
cli.add_command(stop)
|
||||
|
||||
|
||||
def main():
|
||||
return cli()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -751,7 +751,7 @@ def get_address_info_from_redis(redis_address, node_ip_address, num_retries=5):
|
||||
# Some of the information may not be in Redis yet, so wait a little bit.
|
||||
print("Some processes that the driver needs to connect to have not "
|
||||
"registered with Redis, so retrying. Have you run "
|
||||
"./scripts/start_ray.sh on this node?")
|
||||
"'ray start' on this node?")
|
||||
time.sleep(1)
|
||||
counter += 1
|
||||
|
||||
|
||||
@@ -65,11 +65,13 @@ setup(name="ray",
|
||||
distclass=BinaryDistribution,
|
||||
install_requires=["numpy",
|
||||
"funcsigs",
|
||||
"click",
|
||||
"colorama",
|
||||
"psutil",
|
||||
"redis",
|
||||
"cloudpickle >= 0.2.2",
|
||||
"flatbuffers"],
|
||||
entry_points={"console_scripts": ["ray=ray.scripts.scripts:main"]},
|
||||
include_package_data=True,
|
||||
zip_safe=False,
|
||||
license="Apache 2.0")
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)
|
||||
|
||||
python "$ROOT_DIR/start_ray.py" "$@"
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
killall global_scheduler plasma_store plasma_manager local_scheduler
|
||||
|
||||
# Find the PID of the monitor process and kill it.
|
||||
kill $(ps aux | grep monitor.py | awk '{ print $2 }') 2> /dev/null
|
||||
|
||||
# Find the PID of the Redis process and kill it.
|
||||
kill $(ps aux | grep redis-server | awk '{ print $2 }') 2> /dev/null
|
||||
|
||||
# Find the PIDs of the worker processes and kill them.
|
||||
kill $(ps aux | grep default_worker.py | awk '{ print $2 }') 2> /dev/null
|
||||
|
||||
# Kill the processes related to the web UI.
|
||||
killall polymer
|
||||
|
||||
# Find the PID of the Ray UI backend process and kill it.
|
||||
kill $(ps aux | grep ray_ui.py | awk '{ print $2 }') 2> /dev/null
|
||||
|
||||
# Find the PID of the Ray log monitor process and kill it.
|
||||
kill $(ps aux | grep log_monitor.py | awk '{ print $2 }') 2> /dev/null
|
||||
@@ -112,7 +112,7 @@ class DockerRunner(object):
|
||||
if development_mode else [])
|
||||
|
||||
command = (["docker", "run", "-d"] + mem_arg + shm_arg + volume_arg +
|
||||
[docker_image, "/ray/scripts/start_ray.sh", "--head",
|
||||
[docker_image, "ray", "start", "--head", "--block",
|
||||
"--redis-port=6379",
|
||||
"--num-redis-shards={}".format(num_redis_shards),
|
||||
"--num-cpus={}".format(num_cpus),
|
||||
@@ -139,7 +139,7 @@ class DockerRunner(object):
|
||||
if development_mode else [])
|
||||
command = (["docker", "run", "-d"] + mem_arg + shm_arg + volume_arg +
|
||||
["--shm-size=" + shm_size, docker_image,
|
||||
"/ray/scripts/start_ray.sh",
|
||||
"ray", "start", "--block",
|
||||
"--redis-address={:s}:6379".format(self.head_container_ip),
|
||||
"--num-cpus={}".format(num_cpus),
|
||||
"--num-gpus={}".format(num_gpus)])
|
||||
|
||||
+20
-26
@@ -2,24 +2,18 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import unittest
|
||||
import ray
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
|
||||
start_ray_script = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"../scripts/start_ray.sh")
|
||||
stop_ray_script = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
||||
"../scripts/stop_ray.sh")
|
||||
|
||||
|
||||
class MultiNodeTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
# Start the Ray processes on this machine.
|
||||
out = subprocess.check_output([start_ray_script, "--head"]).decode("ascii")
|
||||
out = subprocess.check_output(["ray", "start", "--head"]).decode("ascii")
|
||||
# Get the redis address from the output.
|
||||
redis_substring_prefix = "redis_address=\""
|
||||
redis_address_location = (out.find(redis_substring_prefix) +
|
||||
@@ -29,7 +23,7 @@ class MultiNodeTest(unittest.TestCase):
|
||||
|
||||
def tearDown(self):
|
||||
# Kill the Ray cluster.
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
def testErrorIsolation(self):
|
||||
# Connect a driver to the Ray cluster.
|
||||
@@ -162,53 +156,53 @@ class StartRayScriptTest(unittest.TestCase):
|
||||
# the non-head node code path.
|
||||
|
||||
# Test starting Ray with no arguments.
|
||||
subprocess.check_output([start_ray_script, "--head"]).decode("ascii")
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.check_output(["ray", "start", "--head"]).decode("ascii")
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with a number of workers specified.
|
||||
subprocess.check_output([start_ray_script, "--head", "--num-workers",
|
||||
subprocess.check_output(["ray", "start", "--head", "--num-workers",
|
||||
"20"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with a redis port specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--redis-port", "6379"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with a node IP address specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--node-ip-address", "127.0.0.1"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with an object manager port specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--object-manager-port", "12345"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with the number of CPUs specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--num-cpus", "100"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with the number of GPUs specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--num-gpus", "100"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with all arguments specified.
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--num-workers", "20",
|
||||
"--redis-port", "6379",
|
||||
"--object-manager-port", "12345",
|
||||
"--num-cpus", "100",
|
||||
"--num-gpus", "0"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
# Test starting Ray with invalid arguments.
|
||||
with self.assertRaises(Exception):
|
||||
subprocess.check_output([start_ray_script, "--head",
|
||||
subprocess.check_output(["ray", "start", "--head",
|
||||
"--redis-address", "127.0.0.1:6379"])
|
||||
subprocess.Popen([stop_ray_script]).wait()
|
||||
subprocess.Popen(["ray", "stop"]).wait()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user