From cc8f7db2464c683b5f54b40c017d5b7c71de784a Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Wed, 12 Dec 2018 10:40:54 -0800 Subject: [PATCH] [docs] Improve cluster/docker docs (#3517) - Surfaces local cluster usage - Increases visability of these instructions - Removes some docker docs (that are really out of scope for Ray documentation IMO) Closes #3517. --- doc/source/autoscaling.rst | 45 ++- doc/source/index.rst | 18 +- doc/source/install-on-docker.rst | 12 +- .../using-ray-and-docker-on-a-cluster.md | 236 ------------- doc/source/using-ray-on-a-cluster.rst | 8 +- doc/source/using-ray-on-a-large-cluster.rst | 309 ------------------ .../autoscaler/local/development-example.yaml | 32 ++ python/ray/autoscaler/local/example-full.yaml | 7 +- 8 files changed, 90 insertions(+), 577 deletions(-) delete mode 100644 doc/source/using-ray-and-docker-on-a-cluster.md delete mode 100644 doc/source/using-ray-on-a-large-cluster.rst create mode 100644 python/ray/autoscaler/local/development-example.yaml diff --git a/doc/source/autoscaling.rst b/doc/source/autoscaling.rst index 90c8e92f3..64d57e0ef 100644 --- a/doc/source/autoscaling.rst +++ b/doc/source/autoscaling.rst @@ -1,7 +1,9 @@ -Cloud Setup and Auto-Scaling -============================ +Cluster Setup and Auto-Scaling +============================== -The ``ray up`` command starts or updates an AWS or GCP Ray cluster from your personal computer. Once the cluster is up, you can then SSH into it to run Ray programs. +This document provides instructions for launching a Ray cluster either privately, on AWS, or on GCP. + +The ``ray up`` command starts or updates a Ray cluster from your personal computer. Once the cluster is up, you can then SSH into it to run Ray programs. Quick start (AWS) ----------------- @@ -50,6 +52,28 @@ SSH into the head node and then run Ray programs with ``ray.init(redis_address=" # Teardown the cluster $ ray down ray/python/ray/autoscaler/gcp/example-full.yaml +Quick start (Private Cluster) +----------------------------- + +This is used when you have a list of machine IP addresses to connect in a Ray cluster. You can get started by filling out the fields in the provided `ray/python/ray/autoscaler/local/example-full.yaml `__. +Be sure to specify the proper ``head_ip``, list of ``worker_ips``, and the ``ssh_user`` field. + +Try it out by running these commands from your personal computer. Once the cluster is started, you can then +SSH into the head node and then run Ray programs with ``ray.init(redis_address="localhost:6379")``. + +.. code-block:: bash + + # Create or update the cluster. When the command finishes, it will print + # out the command that can be used to SSH into the cluster head node. + $ ray up ray/python/ray/autoscaler/local/example-full.yaml + + # Reconfigure autoscaling behavior without interrupting running jobs + $ ray up ray/python/ray/autoscaler/local/example-full.yaml \ + --max-workers=N --no-restart + + # Teardown the cluster + $ ray down ray/python/ray/autoscaler/local/example-full.yaml + Running commands on new and existing clusters --------------------------------------------- @@ -197,7 +221,8 @@ The ``example-full.yaml`` configuration is enough to get started with Ray, but f InstanceType: p2.8xlarge **Docker**: Specify docker image. This executes all commands on all nodes in the docker container, -and opens all the necessary ports to support the Ray cluster. This currently does not have GPU support. +and opens all the necessary ports to support the Ray cluster. It will also automatically install +Docker if Docker is not installed. This currently does not have GPU support. .. code-block:: yaml @@ -264,3 +289,15 @@ Additional Cloud providers -------------------------- To use Ray autoscaling on other Cloud providers or cluster management systems, you can implement the ``NodeProvider`` interface (~100 LOC) and register it in `node_provider.py `__. Contributions are welcome! + +Questions or Issues? +-------------------- + +You can post questions or issues or feedback through the following channels: + +1. `Our Mailing List`_: For discussions about development, questions about + usage, or any general questions and feedback. +2. `GitHub Issues`_: For bug reports and feature requests. + +.. _`Our Mailing List`: https://groups.google.com/forum/#!forum/ray-dev +.. _`GitHub Issues`: https://github.com/ray-project/ray/issues diff --git a/doc/source/index.rst b/doc/source/index.rst index 68a33676c..40f9283f4 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -32,7 +32,7 @@ Example Use | results = [f() for i in range(4)] | results = ray.get([f.remote() for i in range(4)]) | +------------------------------------------------+----------------------------------------------------+ - +To launch a Ray cluster, either privately, on AWS, or on GCP, `follow these instructions `_. View the `codebase on GitHub`_. @@ -67,6 +67,13 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin webui.rst async_api.rst +.. toctree:: + :maxdepth: 1 + :caption: Cluster Usage + + autoscaling.rst + using-ray-on-a-cluster.rst + .. toctree:: :maxdepth: 1 :caption: Tune @@ -124,15 +131,6 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin redis-memory-management.rst tempfile.rst -.. toctree:: - :maxdepth: 1 - :caption: Cluster Usage - - autoscaling.rst - using-ray-on-a-cluster.rst - using-ray-on-a-large-cluster.rst - using-ray-and-docker-on-a-cluster.md - .. toctree:: :maxdepth: 1 :caption: Help diff --git a/doc/source/install-on-docker.rst b/doc/source/install-on-docker.rst index 9fa245c16..6baa0363f 100644 --- a/doc/source/install-on-docker.rst +++ b/doc/source/install-on-docker.rst @@ -1,7 +1,7 @@ Installation on Docker ====================== -You can install Ray on any platform that runs Docker. We do not presently +You can install Ray from source on any platform that runs Docker. We do not presently publish Docker images for Ray, but you can build them yourself using the Ray distribution. @@ -25,6 +25,8 @@ the corresponding installation instructions. Linux user may find these Docker installation on EC2 with Ubuntu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. note:: The Ray `autoscaler `_ can automatically install Docker on all of the nodes of your cluster. + The instructions below show in detail how to prepare an Amazon EC2 instance running Ubuntu 16.04 for use with Docker. @@ -165,14 +167,6 @@ Launch the examples container. docker run --shm-size=1024m -t -i ray-project/examples -Hyperparameter optimization -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - cd /ray/examples/hyperopt/ - python /ray/examples/hyperopt/hyperopt_simple.py - Batch L-BFGS ~~~~~~~~~~~~ diff --git a/doc/source/using-ray-and-docker-on-a-cluster.md b/doc/source/using-ray-and-docker-on-a-cluster.md deleted file mode 100644 index 4e7b7a52d..000000000 --- a/doc/source/using-ray-and-docker-on-a-cluster.md +++ /dev/null @@ -1,236 +0,0 @@ -# Using Ray and Docker on a Cluster (Experimental) - -Packaging and deploying an application using Docker can provide certain advantages. It can make managing dependencies easier, help ensure that each cluster node receives a uniform configuration, and facilitate swapping hardware resources between applications. - - -## Create your Docker image - -First build a Ray Docker image by following the instructions for [Installation on Docker](install-on-docker.md). -This will allow you to create the `ray-project/deploy` image that serves as a basis for using Ray on a cluster with Docker. - -Docker images encapsulate the system state that will be used to run nodes in the cluster. -We recommend building on top of the Ray-provided Docker images to add your application code and dependencies. - -You can do this in one of two ways: by building from a customized Dockerfile or by saving an image after entering commands manually into a running container. -We describe both approaches below. - -### Creating a customized Dockerfile - -We recommend that you read the official Docker documentation for [Building your own image](https://docs.docker.com/engine/getstarted/step_four/) ahead of starting this section. -Your customized Dockerfile is a script of commands needed to set up your application, -possibly packaged in a folder with related resources. - -A simple template Dockerfile for a Ray application looks like this: - -``` -# Application Dockerfile template -FROM ray-project/deploy -RUN git clone -RUN -``` - -This file instructs Docker to load the image tagged `ray-project/deploy`, check out the git -repository at ``, and then run the script ``. - -Build the image by running something like: -``` -docker build -t . -``` -Replace `` with a tag of your choice. - - -### Creating a Docker image manually - -Launch the `ray-project/deploy` image interactively - -``` -docker run -t -i ray-project/deploy -``` - -Next, run whatever commands are needed to install your application. -When you are finished type `exit` to stop the container. - -Run -``` -docker ps -a -``` -to identify the id of the container you just exited. - -Next, commit the container -``` -docker commit -t -``` - -Replace `` with a name for your container and replace `` id with the hash id of the container used in configuration. - -## Publishing your Docker image to a repository - -When using Amazon EC2 it can be practical to publish images using the Repositories feature of Elastic Container Service. -Follow the steps below and see [documentation for creating a repository](http://docs.aws.amazon.com/AmazonECR/latest/userguide/repository-create.html) for additional context. - -First ensure that the AWS command-line interface is installed. - -``` -sudo apt-get install -y awscli -``` - -Next create a repository in Amazon's Elastic Container Registry. -This results in a shared resource for storing Docker images that will be accessible from all nodes. - - -``` -aws ecr create-repository --repository-name --region= -``` - -Replace `` with a string describing the application. -Replace `` with the AWS region string, e.g., `us-west-2`. -This should produce output like the following: - -``` -{ - "repository": { - "repositoryUri": "123456789012.dkr.ecr.us-west-2.amazonaws.com/my-app", - "createdAt": 1487227244.0, - "repositoryArn": "arn:aws:ecr:us-west-2:123456789012:repository/my-app", - "registryId": "123456789012", - "repositoryName": "my-app" - } -} -``` - -Take note of the `repositoryUri` string, in this example `123456789012.dkr.ecr.us-west-2.amazonaws.com/my-app`. - - -Tag the Docker image with the repository URI. - -``` -docker tag -``` - -Replace the `` with the container name used previously and replace `` with URI returned by the command used to create the repository. - -Log into the repository: - -``` -eval $(aws ecr get-login --region ) -``` - -Replace `` with your selected AWS region. - -Push the image to the repository: -``` -docker push -``` -Replace `` with the URI of your repository. Now other hosts will be able to access your application Docker image. - - -## Starting a cluster - -We assume a cluster configuration like that described in instructions for [using Ray on a large cluster](using-ray-on-a-large-cluster.md). -In particular, we assume that there is a head node that has ssh access to all of the worker nodes, and that there is a file `workers.txt` listing the IP addresses of all worker nodes. - -### Install the Docker image on all nodes - -Create a script called `setup-docker.sh` on the head node. -``` -# setup-docker.sh -sudo apt-get install -y docker.io -sudo service docker start -sudo usermod -a -G docker ubuntu -exec sudo su -l ubuntu -eval $(aws ecr get-login --region ) -docker pull -``` - -Replace `` with the URI of the repository created in the previous section. -Replace `` with the AWS region in which you created that repository. -This script will install Docker, authenticate the session with the container registry, and download the container image from that registry. - -Run `setup-docker.sh` on the head node (if you used the head node to build the Docker image then you can skip this step): -``` -bash setup-docker.sh -``` - -Run `setup-docker.sh` on the worker nodes: -``` -parallel-ssh -h workers.txt -P -t 0 -I < setup-docker.sh -``` - -### Launch Ray cluster using Docker - -To start Ray on the head node run the following command: - -``` -eval $(aws ecr get-login --region ) -docker run \ - -d --shm-size= --net=host \ - \ - ray start --head \ - --object-manager-port=8076 \ - --redis-port=6379 \ - --num-workers= -``` - -Replace `` with the URI of the repository. -Replace `` with the region of the repository. -Replace `` with the number of workers, e.g., typically a number similar to the number of cores in the system. -Replace `` with the the amount of shared memory to make available within the Docker container, e.g., `8G`. - - -To start Ray on the worker nodes create a script `start-worker-docker.sh` with content like the following: -``` -eval $(aws ecr get-login --region ) -docker run -d --shm-size= --net=host \ - \ - ray start \ - --object-manager-port=8076 \ - --redis-address= \ - --num-workers= - -``` - -Replace `` with the string `:6379` where `` is the private network IP address of the head node. - -Execute the script on the worker nodes: -``` -parallel-ssh -h workers.txt -P -t 0 -I < setup-worker-docker.sh -``` - - -## Running jobs on a cluster - -On the head node, identify the id of the container that you launched as the Ray head. - -``` -docker ps -``` - -the container id appears in the first column of the output. - -Now launch an interactive shell within the container: - -``` -docker exec -t -i bash -``` - -Replace `` with the container id found in the previous step. - -Next, launch your application program. -The Python program should contain an initialization command that takes the Redis address as a parameter: - -``` -ray.init(redis_address="") -``` - - -## Shutting down a cluster - -Kill all running Docker images on the worker nodes: -``` -parallel-ssh -h workers.txt -P 'docker kill $(docker ps -q)' -``` - -Kill all running Docker images on the head node: -``` -docker kill $(docker ps -q) -``` diff --git a/doc/source/using-ray-on-a-cluster.rst b/doc/source/using-ray-on-a-cluster.rst index 611e47b79..2bc8b1cf6 100644 --- a/doc/source/using-ray-on-a-cluster.rst +++ b/doc/source/using-ray-on-a-cluster.rst @@ -3,12 +3,12 @@ Manual Cluster Setup .. note:: - If you're using AWS or GCP you should use the automated `setup commands `__. + If you're using AWS or GCP you should use the automated `setup commands `_. The instructions in this document work well for small clusters. For larger -clusters, follow the instructions for `managing a cluster with parallel ssh`_. +clusters, consider using the pssh package: ``sudo apt-get install pssh`` or +the `setup commands for private clusters `_. -.. _`managing a cluster with parallel ssh`: http://ray.readthedocs.io/en/latest/using-ray-on-a-large-cluster.html Deploying Ray on a Cluster -------------------------- @@ -32,7 +32,7 @@ If the ``--redis-port`` argument is omitted, Ray will choose a port at random. The command will print out the address of the Redis server that was started (and some other address information). -Then on all of the other nodes, run the following. Make sure to replace +**Then on all of the other nodes**, run the following. Make sure to replace ```` with the value printed by the command on the head node (it should look something like ``123.45.67.89:6379``). diff --git a/doc/source/using-ray-on-a-large-cluster.rst b/doc/source/using-ray-on-a-large-cluster.rst deleted file mode 100644 index b87c8c05f..000000000 --- a/doc/source/using-ray-on-a-large-cluster.rst +++ /dev/null @@ -1,309 +0,0 @@ -Manual Cluster Setup on a Large Cluster -======================================= - -.. note:: - - If you're using AWS or GCP you should use the automated `setup commands `__. - -Deploying Ray on a cluster requires a bit of manual work. The instructions here -illustrate how to use parallel ssh commands to simplify the process of running -commands and scripts on many machines simultaneously. - -Booting up a cluster on EC2 ---------------------------- - -* Create an EC2 instance running Ray following the `installation instructions`_. - - * Add any packages that you may need for running your application. - * Install the pssh package: ``sudo apt-get install pssh``. -* `Create an AMI`_ with Ray installed and with whatever code and libraries you - want on the cluster. -* Use the EC2 console to launch additional instances using the AMI you created. -* Configure the instance security groups so that they machines can all - communicate with one another. - -.. _`installation instructions`: http://ray.readthedocs.io/en/latest/installation.html -.. _`Create an AMI`: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/creating-an-ami-ebs.html - -Deploying Ray on a Cluster --------------------------- - -This section assumes that you have a cluster of machines running and that these -nodes have network connectivity to one another. It also assumes that Ray is -installed on each machine. - -Additional assumptions: - -* All of the following commands are run from a machine designated as - the **head node**. -* The head node will run Redis and the global scheduler. -* The head node has ssh access to all other nodes. -* All nodes are accessible via ssh keys -* Ray is checked out on each node at the location ``$HOME/ray``. - -**Note:** The commands below will probably need to be customized for your -specific setup. - -Connect to the head node -~~~~~~~~~~~~~~~~~~~~~~~~ - -In order to initiate ssh commands from the cluster head node we suggest enabling -ssh agent forwarding. This will allow the session that you initiate with the -head node to connect to other nodes in the cluster to run scripts on them. You -can enable ssh forwarding by running the following command before connecting to -the head node (replacing ```` with the path to the private key that you -would use when logging in to the nodes in the cluster). - -.. code-block:: bash - - ssh-add - -Now log in to the head node with the following command, where -```` is the public IP address of the head node (just choose -one of the nodes to be the head node). - -.. code-block:: bash - - ssh -A ubuntu@ - -Build a list of node IP addresses -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -On the head node, populate a file ``workers.txt`` with one IP address on each -line. Do not include the head node IP address in this file. These IP addresses -should typically be private network IP addresses, but any IP addresses which the -head node can use to ssh to worker nodes will work here. This should look -something like the following. - -.. code-block:: bash - - 172.31.27.16 - 172.31.29.173 - 172.31.24.132 - 172.31.29.224 - -Confirm that you can ssh to all nodes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. code-block:: bash - - for host in $(cat workers.txt); do - ssh -o "StrictHostKeyChecking no" $host uptime - done - -You may need to verify the host keys during this process. If so, run this step -again to verify that it worked. If you see a **permission denied** error, you -most likely forgot to run ``ssh-add `` before connecting to the head -node. - -Starting Ray -~~~~~~~~~~~~ - -**Start Ray on the head node** - -On the head node, run the following: - -.. code-block:: bash - - ray start --head --redis-port=6379 - - -**Start Ray on the worker nodes** - -Create a file ``start_worker.sh`` that contains something like the following: - -.. code-block:: bash - - # Make sure the SSH session has the correct version of Python on its path. - # You will probably have to change the line below. - export PATH=/home/ubuntu/anaconda3/bin/:$PATH - ray start --redis-address=:6379 - -This script, when run on the worker nodes, will start up Ray. You will need to -replace ```` with the IP address that worker nodes will use to -connect to the head node (most likely a **private IP address**). In this -example we also export the path to the Python installation since our remote -commands will not be executing in a login shell. - -**Warning:** You will probably need to manually export the correct path to -Python (you will need to change the first line of ``start_worker.sh`` to find -the version of Python that Ray was built against). This is necessary because the -``PATH`` environment variable used by ``parallel-ssh`` can differ from the -``PATH`` environment variable that gets set when you ``ssh`` to the machine. - -**Warning:** If the ``parallel-ssh`` command below appears to hang or otherwise -fails, ``head-node-ip`` may need to be a private IP address instead of a public -IP address (e.g., if you are using EC2). It's also possible that you forgot to -run ``ssh-add `` or that you forgot the ``-A`` flag when connecting to -the head node. - -Now use ``parallel-ssh`` to start up Ray on each worker node. - -.. code-block:: bash - - parallel-ssh -h workers.txt -P -I < start_worker.sh - -Note that on some distributions the ``parallel-ssh`` command may be called -``pssh``. - -**Verification** - -Now you have started all of the Ray processes on each node. These include: - -- Some worker processes on each machine. -- An object store on each machine. -- A local scheduler on each machine. -- Multiple Redis servers (on the head node). - -To confirm that the Ray cluster setup is working, start up Python on one of the -nodes in the cluster and enter the following commands to connect to the Ray -cluster. - -.. code-block:: python - - import ray - ray.init(redis_address="") - -Here ```` should have the form ``:6379``. - -Now you can define remote functions and execute tasks. For example, to verify -that the correct number of nodes have joined the cluster, you can run the -following. - -.. code-block:: python - - import time - - @ray.remote - def f(): - time.sleep(0.01) - return ray.services.get_node_ip_address() - - # Get a list of the IP addresses of the nodes that have joined the cluster. - set(ray.get([f.remote() for _ in range(1000)])) - - -Stopping Ray -~~~~~~~~~~~~ - -**Stop Ray on worker nodes** - -Create a file ``stop_worker.sh`` that contains something like the following: - -.. code-block:: bash - - # Make sure the SSH session has the correct version of Python on its path. - # You will probably have to change the line below. - export PATH=/home/ubuntu/anaconda3/bin/:$PATH - ray stop - -This script, when run on the worker nodes, will stop Ray. Note, you will need to -replace ``/home/ubuntu/anaconda3/bin/`` with the correct path to your Python -installation. - -Now use ``parallel-ssh`` to stop Ray on each worker node. - -.. code-block:: bash - - parallel-ssh -h workers.txt -P -I < stop_worker.sh - -**Stop Ray on the head node** - -.. code-block:: bash - - ray stop - -Upgrading Ray -~~~~~~~~~~~~~ - -Ray remains under active development so you may at times want to upgrade the -cluster to take advantage of improvements and fixes. - -**Create an upgrade script** - -On the head node, create a file called ``upgrade.sh`` that contains the commands -necessary to upgrade Ray. It should look something like the following: - -.. code-block:: bash - - # Make sure the SSH session has the correct version of Python on its path. - # You will probably have to change the line below. - export PATH=/home/ubuntu/anaconda3/bin/:$PATH - # Do pushd/popd to make sure we end up in the same directory. - pushd . - # Upgrade Ray. - cd ray - git checkout master - git pull - cd python - pip install -e . --verbose - popd - -This script executes a series of git commands to update the Ray source code, then builds -and installs Ray. - -**Stop Ray on the cluster** - -Follow the instructions for `Stopping Ray`_. - -**Run the upgrade script on the cluster** - -First run the upgrade script on the head node. This will upgrade the head node -and help confirm that the upgrade script is working properly. - -.. code-block:: bash - - bash upgrade.sh - -Next run the upgrade script on the worker nodes. - -.. code-block:: bash - - parallel-ssh -h workers.txt -P -t 0 -I < upgrade.sh - -Note here that we use the ``-t 0`` option to set the timeout to infinite. You -may also want to use the ``-p`` flag, which controls the degree of parallelism -used by parallel ssh. - -It is probably a good idea to ssh to one of the other nodes and verify that the -upgrade script ran as expected. - -Sync Application Files to other nodes -------------------------------------- - -If you are running an application that reads input files or uses python -libraries then you may find it useful to copy a directory on the head node to -the worker nodes. - -You can do this using the ``parallel-rsync`` command: - -.. code-block:: bash - - parallel-rsync -h workers.txt -r /home/ubuntu/ - -where ```` is the directory you want to synchronize. Note that the -destination argument for this command must represent an absolute path on the -worker node. - -Troubleshooting ---------------- - -Problems with parallel-ssh -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If any of the above commands fail, verify that the head node has SSH access to -the other nodes by running - -.. code-block:: bash - - for host in $(cat workers.txt); do - ssh $host uptime - done - -If you get a permission denied error, then make sure you have SSH'ed to the head -node with agent forwarding enabled. This is done as follows. - -.. code-block:: bash - - ssh-add - ssh -A ubuntu@ diff --git a/python/ray/autoscaler/local/development-example.yaml b/python/ray/autoscaler/local/development-example.yaml new file mode 100644 index 000000000..11f7c960f --- /dev/null +++ b/python/ray/autoscaler/local/development-example.yaml @@ -0,0 +1,32 @@ +cluster_name: default +min_workers: 0 +max_workers: 0 +docker: + image: "" + container_name: "" +target_utilization_fraction: 0.8 +idle_timeout_minutes: 5 +provider: + type: local + head_ip: YOUR_HEAD_NODE_HOSTNAME + worker_ips: [] +auth: + ssh_user: YOUR_USERNAME + ssh_private_key: ~/.ssh/id_rsa +head_node: {} +worker_nodes: {} +file_mounts: + "/tmp/ray_sha": "/YOUR/LOCAL/RAY/REPO/.git/refs/heads/YOUR_BRANCH" +setup_commands: [] +head_setup_commands: [] +worker_setup_commands: [] +setup_commands: + - source activate ray && test -e ray || git clone https://github.com/YOUR_GITHUB/ray.git + - source activate ray && cd ray && git fetch && git reset --hard `cat /tmp/ray_sha` +# - source activate ray && cd ray/python && pip install -e . +head_start_ray_commands: + - source activate ray && ray stop + - source activate ray && ulimit -c unlimited && ray start --head --redis-port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml +worker_start_ray_commands: + - source activate ray && ray stop + - source activate ray && ray start --redis-address=$RAY_HEAD_IP:6379 diff --git a/python/ray/autoscaler/local/example-full.yaml b/python/ray/autoscaler/local/example-full.yaml index 11f7c960f..88d20dadd 100644 --- a/python/ray/autoscaler/local/example-full.yaml +++ b/python/ray/autoscaler/local/example-full.yaml @@ -15,15 +15,12 @@ auth: ssh_private_key: ~/.ssh/id_rsa head_node: {} worker_nodes: {} -file_mounts: - "/tmp/ray_sha": "/YOUR/LOCAL/RAY/REPO/.git/refs/heads/YOUR_BRANCH" +file_mounts: {} setup_commands: [] head_setup_commands: [] worker_setup_commands: [] setup_commands: - - source activate ray && test -e ray || git clone https://github.com/YOUR_GITHUB/ray.git - - source activate ray && cd ray && git fetch && git reset --hard `cat /tmp/ray_sha` -# - source activate ray && cd ray/python && pip install -e . + - source activate ray && pip install -U ray head_start_ray_commands: - source activate ray && ray stop - source activate ray && ulimit -c unlimited && ray start --head --redis-port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml