diff --git a/ci/long_running_distributed_tests/ray-project/project.yaml b/ci/long_running_distributed_tests/ray-project/project.yaml deleted file mode 100644 index 09f6e8f53..000000000 --- a/ci/long_running_distributed_tests/ray-project/project.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# This file is generated by `ray project create`. - -name: long-running-distributed-tests - -cluster: - config: ray-project/cluster.yaml - params: - - name: ray_version # Ray version string. - default: "0.8.2" - - - name: commit # Ray commit SHA string. - default: "f5a1307a608fe5fdbdb04616b22c91f029af329a" - - - name: ray_branch - default: "releases/0.8.2" - -commands: - - name: test_workload - help: "Start a long running distributed test." - command: | - python workloads/{{workload}}.py - params: - - name: workload - help: "Name of workload to run." - choices: - [ - "pytorch_pbt_failure" - ] - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [ - # For example, uncomment this to save the logs from the - # last ray job. - # "/tmp/ray/session_latest", -] diff --git a/ci/long_running_distributed_tests/run.sh b/ci/long_running_distributed_tests/run.sh deleted file mode 100644 index d9edef56f..000000000 --- a/ci/long_running_distributed_tests/run.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -ray_version="" -commit="" -ray_branch="" -workload="" - -usage() { - echo "Start one microbenchmark trial." -} - -for i in "$@" -do -echo "$i" -case "$i" in - --ray-version=*) - ray_version="${i#*=}" - - ;; - --commit=*) - commit="${i#*=}" - ;; - --ray-branch=*) - ray_branch="${i#*=}" - ;; - --workload=*) - workload="${i#*=}" - ;; - --help) - usage - exit - ;; - *) - echo "unknown arg, $i" - exit 1 - ;; -esac -done - -echo "version: $ray_version" -echo "commit: $commit" -echo "branch: $ray_branch" -echo "workload: $workload" - -python "workloads/$workload.py" diff --git a/ci/long_running_tests/ray-project/project.yaml b/ci/long_running_tests/ray-project/project.yaml deleted file mode 100644 index c23a3f5cf..000000000 --- a/ci/long_running_tests/ray-project/project.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: long-running-tests -description: "Ray's long running stress tests" - -cluster: - config: ray-project/cluster.yaml - -commands: - - name: test_workload - help: "Start a long running stress test." - command: | - # Install nightly Ray wheels. - source activate tensorflow_p36 && pip install -q -U {{wheel}} Click - source activate tensorflow_p36 && pip install -q ray[all] gym[atari] - source activate tensorflow_p36 && python workloads/{{workload}}.py - params: - - name: wheel - help: "URL to the ray wheel to test (defaults to latest)." - default: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp36-cp36m-manylinux1_x86_64.whl - - name: workload - help: "Name of the workload to run." - choices: - [ - "actor_deaths", - "apex", - "impala", - "many_actor_tasks", - "many_drivers", - "many_tasks", - "node_failures", - "pbt", - "serve", - "serve_failure", - "many_tasks_serialized_ids", - ] - config: - tmux: true - - - name: check-load - command: uptime - help: "Check load of the workload." - - - name: show-output - command: tmux capture-pane -p - help: "Show tail of the workoad output." - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [] -# For example, uncomment this to save the logs from the -# last ray job. -# "/tmp/ray/session_latest", diff --git a/ci/microbenchmark/ray-project/project.yaml b/ci/microbenchmark/ray-project/project.yaml deleted file mode 100644 index 8e9c737d3..000000000 --- a/ci/microbenchmark/ray-project/project.yaml +++ /dev/null @@ -1,39 +0,0 @@ -name: microbenchmark -description: "Ray's microbenchmark" - -cluster: - config: ray-project/cluster.yaml - -commands: - - name: run - help: "Start one microbenchmark trial." - command: | - rm ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl || true - wget https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - pip uninstall -y -q ray - pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - OMP_NUM_THREADS=64 ray microbenchmark - params: - - name: ray_version # Ray version string. - default: "1.1.0.dev0" - - - name: commit # Ray commit SHA string. - default: "FILL ME IN" - - - name: ray_branch - default: "master" - config: - tmux: true - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [ - # For example, uncomment this to save the logs from the - # last ray job. - # "/tmp/ray/session_latest", -] \ No newline at end of file diff --git a/ci/regression_test/stress_tests/ray-project/project.yaml b/ci/regression_test/stress_tests/ray-project/project.yaml deleted file mode 100644 index 2324eacdb..000000000 --- a/ci/regression_test/stress_tests/ray-project/project.yaml +++ /dev/null @@ -1,37 +0,0 @@ -# This file is generated by `ray project create`. - -name: ray_stress_tests - -cluster: - config: ray-project/cluster.yaml - params: - - name: ray_version # Ray version string. - default: "0.8.2" - - - name: commit # Ray commit SHA string. - default: "f5a1307a608fe5fdbdb04616b22c91f029af329a" - - - name: ray_branch - default: "releases/0.8.2" - -commands: - - name: test_many_tasks - help: "Run a stress test that executes many tasks." - command: | - python test_many_tasks.py - - - name: test_dead_actors - help: "Run a stress test that spawns and kills many actors." - command: | - python test_dead_actors.py - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [ - # For example, uncomment this to save the logs from the - # last ray job. - # "/tmp/ray/session_latest", -] diff --git a/ci/regression_test/stress_tests/ray-project/requirements.txt b/ci/regression_test/stress_tests/ray-project/requirements.txt deleted file mode 100644 index 0f026d879..000000000 --- a/ci/regression_test/stress_tests/ray-project/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -ray[debug] \ No newline at end of file diff --git a/ci/regression_test/stress_tests/run.sh b/ci/regression_test/stress_tests/run.sh deleted file mode 100644 index e56b2b8e5..000000000 --- a/ci/regression_test/stress_tests/run.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash - -ray_version="" -commit="" -ray_branch="" -workload="" - -usage() { - echo "Start one microbenchmark trial." -} - -for i in "$@" -do -echo "$i" -case "$i" in - --ray-version=*) - ray_version="${i#*=}" - - ;; - --commit=*) - commit="${i#*=}" - ;; - --ray-branch=*) - ray_branch="${i#*=}" - ;; - --workload=*) - workload="${i#*=}" - ;; - --help) - usage - exit - ;; - *) - echo "unknown arg, $i" - exit 1 - ;; -esac -done - -echo "version: $ray_version" -echo "commit: $commit" -echo "branch: $ray_branch" -echo "workload: $workload" - -python "$workload.py" diff --git a/ci/rllib_tests/regresssion_tests/ray-project/project.yaml b/ci/rllib_tests/regresssion_tests/ray-project/project.yaml deleted file mode 100644 index b9f1273ed..000000000 --- a/ci/rllib_tests/regresssion_tests/ray-project/project.yaml +++ /dev/null @@ -1,57 +0,0 @@ -# This file is generated by `ray project create`. - -name: rllib_regression_tests - -# description: A short description of the project. -# The URL of the repo this project is part of. -# repo: ... - -cluster: - config: ray-project/cluster.yaml - params: - - name: ray_version # Ray version string. - default: "0.8.2" - - - name: commit # Ray commit SHA string. - default: "f5a1307a608fe5fdbdb04616b22c91f029af329a" - - - name: ray_branch - default: "releases/0.8.2" - - -environment: - # dockerfile: The dockerfile to be built and ran the commands with. - # dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04. - requirements: ray-project/requirements.txt - - shell: # Shell commands to be ran for environment setup. - - echo "Setting up the environment" - -commands: - - name: check-load - command: uptime - help: "Check load of the workload." - - - name: check-gpu - command: nvidia-smi - help: "Check load of the gpu." - - - name: show-output - command: tmux capture-pane -p - help: "Show tail of the workoad output." - - - name: run-regression-tests - command: | - source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml - help: "Run rllib regression tests" - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [ - # For example, uncomment this to save the logs from the - # last ray job. - # "/tmp/ray/session_latest", -] diff --git a/ci/rllib_tests/regresssion_tests/run.sh b/ci/rllib_tests/regresssion_tests/run.sh deleted file mode 100644 index f6ecb5454..000000000 --- a/ci/rllib_tests/regresssion_tests/run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash - -source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml diff --git a/ci/rllib_tests/stress_tests/ray-project/project.yaml b/ci/rllib_tests/stress_tests/ray-project/project.yaml deleted file mode 100644 index a9ce5488b..000000000 --- a/ci/rllib_tests/stress_tests/ray-project/project.yaml +++ /dev/null @@ -1,52 +0,0 @@ -# This file is generated by `ray project create`. - -name: rllib_stress_tests - -# description: A short description of the project. -# The URL of the repo this project is part of. -# repo: ... - -cluster: - config: ray-project/cluster.yaml - params: - - name: ray_version # Ray version string. - default: "0.8.2" - - - name: commit # Ray commit SHA string. - default: "f5a1307a608fe5fdbdb04616b22c91f029af329a" - - - name: ray_branch - default: "releases/0.8.2" - - -environment: - # dockerfile: The dockerfile to be built and ran the commands with. - # dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04. - requirements: ray-project/requirements.txt - - shell: # Shell commands to be ran for environment setup. - - echo "Setting up the environment" - -commands: - - name: check-load - command: uptime - help: "Check load of the workload." - - - name: show-output - command: tmux capture-pane -p - help: "Show tail of the workoad output." - - - name: run-impala - command: bash run.sh - help: "Run impala stress test" - -# Pathnames for files and directories that should be saved -# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project -# directory or absolute. Generally, this should be files -# that were created by an active session, such as -# application checkpoints and logs. -output_files: [ - # For example, uncomment this to save the logs from the - # last ray job. - # "/tmp/ray/session_latest", -] \ No newline at end of file diff --git a/ci/rllib_tests/stress_tests/run.sh b/ci/rllib_tests/stress_tests/run.sh deleted file mode 100644 index 08af29763..000000000 --- a/ci/rllib_tests/stress_tests/run.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash - -source activate tensorflow_p36 - -python3 wait_cluster.py - -rllib train -f atari_impala_xlarge.yaml --ray-address=auto --queue-trials \ No newline at end of file diff --git a/ci/rllib_tests/unit_gpu_tests/run.sh b/ci/rllib_tests/unit_gpu_tests/run.sh deleted file mode 100644 index 57024310f..000000000 --- a/ci/rllib_tests/unit_gpu_tests/run.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -# Run all test cases, but with a forced num_gpus=1. -# TODO: (sven) chose correct dir and run over all RLlib tests and example scripts! -source activate tensorflow_p36 && export RAY_FORCE_NUM_GPUS=1 && cd ~ && python -m pytest test_attention_net_learning.py diff --git a/release/1.0.0.env b/release/1.0.0.env new file mode 100644 index 000000000..26eed1dd5 --- /dev/null +++ b/release/1.0.0.env @@ -0,0 +1,2 @@ +export ray_version="1.0.0rc1" +export commit=fd5ddb661e659c2b0c968661d96d0405426912e5 diff --git a/release/README.md b/release/README.md new file mode 100644 index 000000000..77c79e9e7 --- /dev/null +++ b/release/README.md @@ -0,0 +1,2 @@ +Running the kickoff script: + diff --git a/release/RELEASE_PROCESS.rst b/release/RELEASE_PROCESS.rst new file mode 120000 index 000000000..abd0e53fa --- /dev/null +++ b/release/RELEASE_PROCESS.rst @@ -0,0 +1 @@ +../doc/dev/RELEASE_PROCESS.rst \ No newline at end of file diff --git a/release/kickoff.sh b/release/kickoff.sh new file mode 100755 index 000000000..67e452478 --- /dev/null +++ b/release/kickoff.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + + +source "$2" + +ray_version=${ray_version:-} +commit=${commit:-} + +if [[ $ray_version == "" || $commit == "" || $1 == "" ]] +then + echo "Provide --ray-version, --commit, and --ray-branch" + exit 1 +fi + +echo "version: $ray_version" +echo "commit: $commit" +echo "workload: $1" + +DATESTR=$(date +%Y%m%d-%H%M) +SESSION="$1-$DATESTR" + +echo "session: $SESSION" + +chmod +x ./run.sh +if [ -z "$NO_UP" ]; then + anyscale up "$SESSION" +fi +anyscale push "$SESSION" +anyscale exec -n "$SESSION" "./run.sh $1 --ray-version=$ray_version --commit=$commit" diff --git a/ci/long_running_distributed_tests/README.rst b/release/long_running_distributed_tests/README.rst similarity index 100% rename from ci/long_running_distributed_tests/README.rst rename to release/long_running_distributed_tests/README.rst diff --git a/ci/long_running_distributed_tests/ray-project/cluster.yaml b/release/long_running_distributed_tests/cluster.yaml similarity index 74% rename from ci/long_running_distributed_tests/ray-project/cluster.yaml rename to release/long_running_distributed_tests/cluster.yaml index 4aa655cce..a2ed252a4 100644 --- a/ci/long_running_distributed_tests/ray-project/cluster.yaml +++ b/release/long_running_distributed_tests/cluster.yaml @@ -58,23 +58,7 @@ worker_nodes: InstanceMarketOptions: MarketType: spot -setup_commands: - # Install ray. - - conda uninstall -y terminado || true - - pip install -U pip - - pip install terminado - - wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - ray || pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl || true - # Installing this without -U to make sure we don't replace the existing Ray installation - - pip install ray[rllib] - - pip install -U ipdb - # There have been some recent problems with torch 1.5 and torchvision 0.6 - # not recognizing GPUs. - # So, we force install torch 1.4 and torchvision 0.5. - # https://github.com/pytorch/pytorch/issues/37212#issuecomment-623198624. - - pip install torch==1.4.0 torchvision==0.5.0 - - echo set-window-option -g mouse on > ~/.tmux.conf - - echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc +setup_commands: [] # Command to start ray on the head node. You don't need to change this. head_start_ray_commands: diff --git a/release/long_running_distributed_tests/run.sh b/release/long_running_distributed_tests/run.sh new file mode 100755 index 000000000..a2b7070f8 --- /dev/null +++ b/release/long_running_distributed_tests/run.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +ray_version="" +commit="" +ray_branch="" +workload="" + +usage() { + echo "Start one microbenchmark trial." +} + +for i in "$@" +do +echo "$i" +case "$i" in + --ray-version=*) + ray_version="${i#*=}" + + ;; + --commit=*) + commit="${i#*=}" + ;; + --ray-branch=*) + ray_branch="${i#*=}" + ;; + --workload=*) + workload="${i#*=}" + ;; + --help) + usage + exit + ;; + *) + echo "unknown arg, $i" + exit 1 + ;; +esac +done + +echo "version: $ray_version" +echo "commit: $commit" +echo "branch: $ray_branch" +echo "workload: $workload" + +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" + +conda uninstall -y terminado || true +pip install -U pip +pip install terminado +pip install -U "$wheel" +pip install "ray[rllib]" +pip install -U ipdb +# There have been some recent problems with torch 1.5 and torchvision 0.6 +# not recognizing GPUs. +# So, we force install torch 1.4 and torchvision 0.5. +# https://github.com/pytorch/pytorch/issues/37212#issuecomment-623198624. +pip install torch==1.4.0 torchvision==0.5.0 +echo set-window-option -g mouse on > ~/.tmux.conf +echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc + +python "workloads/$workload.py" diff --git a/ci/long_running_distributed_tests/workloads/pytorch_pbt_failure.py b/release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py similarity index 100% rename from ci/long_running_distributed_tests/workloads/pytorch_pbt_failure.py rename to release/long_running_distributed_tests/workloads/pytorch_pbt_failure.py diff --git a/ci/long_running_tests/.gitignore b/release/long_running_tests/.gitignore similarity index 100% rename from ci/long_running_tests/.gitignore rename to release/long_running_tests/.gitignore diff --git a/ci/long_running_tests/README.rst b/release/long_running_tests/README.rst similarity index 100% rename from ci/long_running_tests/README.rst rename to release/long_running_tests/README.rst diff --git a/ci/long_running_tests/ray-project/cluster.yaml b/release/long_running_tests/cluster.yaml similarity index 57% rename from ci/long_running_tests/ray-project/cluster.yaml rename to release/long_running_tests/cluster.yaml index 2687780d4..074d445ba 100644 --- a/ci/long_running_tests/ray-project/cluster.yaml +++ b/release/long_running_tests/cluster.yaml @@ -33,22 +33,7 @@ worker_nodes: MarketType: spot # List of shell commands to run to set up nodes. -setup_commands: - # Install latest TensorFlow - - source activate tensorflow_p36 && conda remove -y --force wrapt || true - - source activate tensorflow_p36 && pip install --upgrade pip - - source activate tensorflow_p36 && pip install -U tensorflow==1.14 - - echo set-window-option -g mouse on > ~/.tmux.conf - - echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc - # Serve load testing tool - - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && sudo cp wrk /usr/local/bin' - - # Uncomment the following if you wish to build Ray instead. - # - sudo apt-get update - # - sudo apt-get install -y build-essential curl unzip - # - git clone https://github.com/ray-project/ray || true - # - ray/ci/travis/install-bazel.sh - # - cd ray/python; git checkout master; git pull; source activate tensorflow_p36 && pip install -e . --verbose +setup_commands: [] # Custom commands that will be run on the head node after common setup. head_setup_commands: [] diff --git a/ci/long_running_tests/run.sh b/release/long_running_tests/run.sh similarity index 65% rename from ci/long_running_tests/run.sh rename to release/long_running_tests/run.sh index 86e81bc8f..e9288de1c 100644 --- a/ci/long_running_tests/run.sh +++ b/release/long_running_tests/run.sh @@ -50,9 +50,17 @@ echo "workload: $workload" wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" +echo set-window-option -g mouse on > ~/.tmux.conf +echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc +# Serve load testing tool +rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && sudo cp wrk /usr/local/bin pip install -U pip unset RAY_ADDRESS -source activate tensorflow_p36 && pip install -q -U "$wheel" Click -source activate tensorflow_p36 && pip install -q "ray[all]" "gym[atari]" -source activate tensorflow_p36 && python "workloads/$workload.py" +source activate tensorflow_p36 +conda remove -y --force wrapt || true +pip install --upgrade pip +pip install -U tensorflow==1.14 +pip install -q -U "$wheel" Click +pip install -q "ray[all]" "gym[atari]" +python "workloads/$workload.py" diff --git a/ci/long_running_tests/workloads/actor_deaths.py b/release/long_running_tests/workloads/actor_deaths.py similarity index 100% rename from ci/long_running_tests/workloads/actor_deaths.py rename to release/long_running_tests/workloads/actor_deaths.py diff --git a/ci/long_running_tests/workloads/apex.py b/release/long_running_tests/workloads/apex.py similarity index 100% rename from ci/long_running_tests/workloads/apex.py rename to release/long_running_tests/workloads/apex.py diff --git a/ci/long_running_tests/workloads/impala.py b/release/long_running_tests/workloads/impala.py similarity index 100% rename from ci/long_running_tests/workloads/impala.py rename to release/long_running_tests/workloads/impala.py diff --git a/ci/long_running_tests/workloads/many_actor_tasks.py b/release/long_running_tests/workloads/many_actor_tasks.py similarity index 100% rename from ci/long_running_tests/workloads/many_actor_tasks.py rename to release/long_running_tests/workloads/many_actor_tasks.py diff --git a/ci/long_running_tests/workloads/many_drivers.py b/release/long_running_tests/workloads/many_drivers.py similarity index 100% rename from ci/long_running_tests/workloads/many_drivers.py rename to release/long_running_tests/workloads/many_drivers.py diff --git a/ci/long_running_tests/workloads/many_tasks.py b/release/long_running_tests/workloads/many_tasks.py similarity index 100% rename from ci/long_running_tests/workloads/many_tasks.py rename to release/long_running_tests/workloads/many_tasks.py diff --git a/ci/long_running_tests/workloads/many_tasks_serialized_ids.py b/release/long_running_tests/workloads/many_tasks_serialized_ids.py similarity index 100% rename from ci/long_running_tests/workloads/many_tasks_serialized_ids.py rename to release/long_running_tests/workloads/many_tasks_serialized_ids.py diff --git a/ci/long_running_tests/workloads/node_failures.py b/release/long_running_tests/workloads/node_failures.py similarity index 100% rename from ci/long_running_tests/workloads/node_failures.py rename to release/long_running_tests/workloads/node_failures.py diff --git a/ci/long_running_tests/workloads/pbt.py b/release/long_running_tests/workloads/pbt.py similarity index 100% rename from ci/long_running_tests/workloads/pbt.py rename to release/long_running_tests/workloads/pbt.py diff --git a/ci/long_running_tests/workloads/serve.py b/release/long_running_tests/workloads/serve.py similarity index 100% rename from ci/long_running_tests/workloads/serve.py rename to release/long_running_tests/workloads/serve.py diff --git a/ci/long_running_tests/workloads/serve_failure.py b/release/long_running_tests/workloads/serve_failure.py similarity index 100% rename from ci/long_running_tests/workloads/serve_failure.py rename to release/long_running_tests/workloads/serve_failure.py diff --git a/ci/microbenchmark/ray-project/cluster.yaml b/release/microbenchmark/cluster.yaml similarity index 100% rename from ci/microbenchmark/ray-project/cluster.yaml rename to release/microbenchmark/cluster.yaml diff --git a/ci/microbenchmark/run.sh b/release/microbenchmark/run.sh old mode 100644 new mode 100755 similarity index 67% rename from ci/microbenchmark/run.sh rename to release/microbenchmark/run.sh index 690851f5e..7aa2fd09d --- a/ci/microbenchmark/run.sh +++ b/release/microbenchmark/run.sh @@ -21,6 +21,9 @@ case "$i" in --ray-branch=*) ray_branch="${i#*=}" ;; + --workload=*) + workload="${i#*=}" + ;; --help) usage exit @@ -38,15 +41,20 @@ then exit 1 fi + echo "version: $ray_version" echo "commit: $commit" echo "branch: $ray_branch" +echo "workload: $workload" -rm "ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl" || true -wget "https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl" +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl" + +echo set-window-option -g mouse on > ~/.tmux.conf +echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc pip uninstall -y -q ray -pip install -U "ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl" +pip install --upgrade pip +pip install -U "$wheel" unset RAY_ADDRESS OMP_NUM_THREADS=64 ray microbenchmark diff --git a/ci/rllib_tests/regresssion_tests/ray-project/cluster.yaml b/release/rllib_tests/regression_tests/cluster.yaml similarity index 69% rename from ci/rllib_tests/regresssion_tests/ray-project/cluster.yaml rename to release/rllib_tests/regression_tests/cluster.yaml index 7a4c643c5..d0aa94e8c 100644 --- a/ci/rllib_tests/regresssion_tests/ray-project/cluster.yaml +++ b/release/rllib_tests/regression_tests/cluster.yaml @@ -26,13 +26,7 @@ head_node: # List of shell commands to run to set up nodes. -setup_commands: - - wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - conda uninstall -y terminado - - source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - source activate tensorflow_p36 && pip install ray[rllib] ray[debug] - - source activate tensorflow_p36 && pip install torch==1.6 torchvision - - source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0 +setup_commands: [] # Command to start ray on the head node. You don't need to change this. head_start_ray_commands: diff --git a/ci/rllib_tests/regresssion_tests/compact-regression-test.yaml b/release/rllib_tests/regression_tests/compact-regression-test.yaml similarity index 100% rename from ci/rllib_tests/regresssion_tests/compact-regression-test.yaml rename to release/rllib_tests/regression_tests/compact-regression-test.yaml diff --git a/ci/rllib_tests/regresssion_tests/ray-project/requirements.txt b/release/rllib_tests/regression_tests/requirements.txt similarity index 100% rename from ci/rllib_tests/regresssion_tests/ray-project/requirements.txt rename to release/rllib_tests/regression_tests/requirements.txt diff --git a/release/rllib_tests/regression_tests/run.sh b/release/rllib_tests/regression_tests/run.sh new file mode 100755 index 000000000..cd4afe1d0 --- /dev/null +++ b/release/rllib_tests/regression_tests/run.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +ray_version="" +commit="" +ray_branch="" + +for i in "$@" +do +echo "$i" +case "$i" in + --ray-version=*) + ray_version="${i#*=}" + + ;; + --commit=*) + commit="${i#*=}" + ;; + --ray-branch=*) + ray_branch="${i#*=}" + ;; + --workload=*) + ;; + --help) + usage + exit + ;; + *) + echo "unknown arg, $i" + exit 1 + ;; +esac +done + +if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]] +then + echo "Provide --ray-version, --commit, and --ray-branch" + exit 1 +fi + +echo "version: $ray_version" +echo "commit: $commit" +echo "branch: $ray_branch" +echo "workload: ignored" + +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" + +conda uninstall -y terminado +source activate tensorflow_p36 && pip install -U pip +source activate tensorflow_p36 && pip install -U "$wheel" +source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]" +source activate tensorflow_p36 && pip install torch==1.6 torchvision +source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0 +source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml diff --git a/ci/rllib_tests/stress_tests/atari_impala_xlarge.yaml b/release/rllib_tests/stress_tests/atari_impala_xlarge.yaml similarity index 100% rename from ci/rllib_tests/stress_tests/atari_impala_xlarge.yaml rename to release/rllib_tests/stress_tests/atari_impala_xlarge.yaml diff --git a/ci/rllib_tests/stress_tests/ray-project/cluster.yaml b/release/rllib_tests/stress_tests/cluster.yaml similarity index 89% rename from ci/rllib_tests/stress_tests/ray-project/cluster.yaml rename to release/rllib_tests/stress_tests/cluster.yaml index 461bc7f58..e4fd2d26d 100644 --- a/ci/rllib_tests/stress_tests/ray-project/cluster.yaml +++ b/release/rllib_tests/stress_tests/cluster.yaml @@ -86,12 +86,7 @@ file_mounts: { } # List of shell commands to run to set up nodes. -setup_commands: - - wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - conda uninstall -y terminado - - source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - source activate tensorflow_p36 && pip install ray[rllib] ray[debug] - - source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0 +setup_commands: [] # Custom commands that will be run on the head node after common setup. head_setup_commands: [] diff --git a/ci/rllib_tests/stress_tests/ray-project/requirements.txt b/release/rllib_tests/stress_tests/requirements.txt similarity index 100% rename from ci/rllib_tests/stress_tests/ray-project/requirements.txt rename to release/rllib_tests/stress_tests/requirements.txt diff --git a/release/rllib_tests/stress_tests/run.sh b/release/rllib_tests/stress_tests/run.sh new file mode 100755 index 000000000..1608e32de --- /dev/null +++ b/release/rllib_tests/stress_tests/run.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +ray_version="" +commit="" +ray_branch="" + +for i in "$@" +do +echo "$i" +case "$i" in + --ray-version=*) + ray_version="${i#*=}" + + ;; + --commit=*) + commit="${i#*=}" + ;; + --ray-branch=*) + ray_branch="${i#*=}" + ;; + --workload=*) + ;; + --help) + usage + exit + ;; + *) + echo "unknown arg, $i" + exit 1 + ;; +esac +done + +if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]] +then + echo "Provide --ray-version, --commit, and --ray-branch" + exit 1 +fi + +echo "version: $ray_version" +echo "commit: $commit" +echo "branch: $ray_branch" +echo "workload: ignored" + +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" + +conda uninstall -y terminado +source activate tensorflow_p36 && pip install -U pip +source activate tensorflow_p36 && pip install -U "$wheel" +source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]" +source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0 +source activate tensorflow_p36 + +python3 wait_cluster.py + +rllib train -f atari_impala_xlarge.yaml --ray-address=auto --queue-trials diff --git a/ci/rllib_tests/stress_tests/wait_cluster.py b/release/rllib_tests/stress_tests/wait_cluster.py similarity index 100% rename from ci/rllib_tests/stress_tests/wait_cluster.py rename to release/rllib_tests/stress_tests/wait_cluster.py diff --git a/ci/rllib_tests/unit_gpu_tests/ray-project/cluster.yaml b/release/rllib_tests/unit_gpu_tests/cluster.yaml similarity index 80% rename from ci/rllib_tests/unit_gpu_tests/ray-project/cluster.yaml rename to release/rllib_tests/unit_gpu_tests/cluster.yaml index d0fa67149..2030bb2ac 100644 --- a/ci/rllib_tests/unit_gpu_tests/ray-project/cluster.yaml +++ b/release/rllib_tests/unit_gpu_tests/cluster.yaml @@ -26,10 +26,7 @@ head_node: # List of shell commands to run to set up nodes. -setup_commands: - - wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl - - conda uninstall -y terminado - - source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl +setup_commands: [] # Command to start ray on the head node. You don't need to change this. head_start_ray_commands: diff --git a/ci/rllib_tests/unit_gpu_tests/ray-project/requirements.txt b/release/rllib_tests/unit_gpu_tests/requirements.txt similarity index 100% rename from ci/rllib_tests/unit_gpu_tests/ray-project/requirements.txt rename to release/rllib_tests/unit_gpu_tests/requirements.txt diff --git a/release/rllib_tests/unit_gpu_tests/run.sh b/release/rllib_tests/unit_gpu_tests/run.sh new file mode 100755 index 000000000..1d0a554a8 --- /dev/null +++ b/release/rllib_tests/unit_gpu_tests/run.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +ray_version="" +commit="" +ray_branch="" + +for i in "$@" +do +echo "$i" +case "$i" in + --ray-version=*) + ray_version="${i#*=}" + + ;; + --commit=*) + commit="${i#*=}" + ;; + --ray-branch=*) + ray_branch="${i#*=}" + ;; + --workload=*) + ;; + --help) + usage + exit + ;; + *) + echo "unknown arg, $i" + exit 1 + ;; +esac +done + +if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]] +then + echo "Provide --ray-version, --commit, and --ray-branch" + exit 1 +fi + +echo "version: $ray_version" +echo "commit: $commit" +echo "branch: $ray_branch" +echo "workload: ignored" + +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" + +conda uninstall -y terminado +source activate tensorflow_p36 && pip install -U pip +source activate tensorflow_p36 && pip install -U "$wheel" + +# Run all test cases, but with a forced num_gpus=1. +# TODO: (sven) chose correct dir and run over all RLlib tests and example scripts! +source activate tensorflow_p36 && export RAY_FORCE_NUM_GPUS=1 && cd ~ && python -m pytest test_attention_net_learning.py diff --git a/ci/regression_test/stress_tests/ray-project/cluster.yaml b/release/stress_tests/cluster.yaml similarity index 99% rename from ci/regression_test/stress_tests/ray-project/cluster.yaml rename to release/stress_tests/cluster.yaml index 81534da83..11ac15b88 100644 --- a/ci/regression_test/stress_tests/ray-project/cluster.yaml +++ b/release/stress_tests/cluster.yaml @@ -85,7 +85,7 @@ file_mounts: { } # List of shell commands to run to set up nodes. -setup_commands: +setup_commands: [] # Uncomment these if you want to build ray from source. # - sudo apt-get -qq update # - sudo apt-get install -y build-essential curl unzip diff --git a/ci/long_running_distributed_tests/ray-project/requirements.txt b/release/stress_tests/requirements.txt similarity index 100% rename from ci/long_running_distributed_tests/ray-project/requirements.txt rename to release/stress_tests/requirements.txt diff --git a/release/stress_tests/run.sh b/release/stress_tests/run.sh new file mode 100644 index 000000000..183a89a03 --- /dev/null +++ b/release/stress_tests/run.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +ray_version="" +commit="" +ray_branch="" +workload="" + +for i in "$@" +do +echo "$i" +case "$i" in + --ray-version=*) + ray_version="${i#*=}" + + ;; + --commit=*) + commit="${i#*=}" + ;; + --ray-branch=*) + ray_branch="${i#*=}" + ;; + --workload=*) + workload="${i#*=}" + ;; + --help) + usage + exit + ;; + *) + echo "unknown arg, $i" + exit 1 + ;; +esac +done + +echo "version: $ray_version" +echo "commit: $commit" +echo "branch: $ray_branch" +echo "workload: $workload" + +wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl" + +# Install Anaconda. +wget --quiet https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh || true +bash Anaconda3-5.0.1-Linux-x86_64.sh -b -p "$HOME/anaconda3" || true +# shellcheck disable=SC2016 +echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc + +conda uninstall -y terminado +source activate tensorflow_p36 && pip install -U pip +source activate tensorflow_p36 && pip install -U "$wheel" + +pip install -U pip +conda uninstall -y terminado || true +pip install terminado +pip install boto3==1.4.8 cython==0.29.0 +python "workloads/$workload.py" diff --git a/ci/regression_test/stress_tests/test_dead_actors.py b/release/stress_tests/workloads/test_dead_actors.py similarity index 100% rename from ci/regression_test/stress_tests/test_dead_actors.py rename to release/stress_tests/workloads/test_dead_actors.py diff --git a/ci/regression_test/stress_tests/test_many_tasks.py b/release/stress_tests/workloads/test_many_tasks.py similarity index 100% rename from ci/regression_test/stress_tests/test_many_tasks.py rename to release/stress_tests/workloads/test_many_tasks.py diff --git a/ci/regression_test/stress_tests/placement_group/test_placement_group.py b/release/stress_tests/workloads/test_placement_group.py similarity index 100% rename from ci/regression_test/stress_tests/placement_group/test_placement_group.py rename to release/stress_tests/workloads/test_placement_group.py