mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 18:06:25 +08:00
Clean up release tests (#11420)
This commit is contained in:
@@ -1,39 +0,0 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: long-running-distributed-tests
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
params:
|
||||
- name: ray_version # Ray version string.
|
||||
default: "0.8.2"
|
||||
|
||||
- name: commit # Ray commit SHA string.
|
||||
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
|
||||
|
||||
- name: ray_branch
|
||||
default: "releases/0.8.2"
|
||||
|
||||
commands:
|
||||
- name: test_workload
|
||||
help: "Start a long running distributed test."
|
||||
command: |
|
||||
python workloads/{{workload}}.py
|
||||
params:
|
||||
- name: workload
|
||||
help: "Name of workload to run."
|
||||
choices:
|
||||
[
|
||||
"pytorch_pbt_failure"
|
||||
]
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: [
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
]
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
workload=""
|
||||
|
||||
usage() {
|
||||
echo "Start one microbenchmark trial."
|
||||
}
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
workload="${i#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: $workload"
|
||||
|
||||
python "workloads/$workload.py"
|
||||
@@ -1,54 +0,0 @@
|
||||
name: long-running-tests
|
||||
description: "Ray's long running stress tests"
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
|
||||
commands:
|
||||
- name: test_workload
|
||||
help: "Start a long running stress test."
|
||||
command: |
|
||||
# Install nightly Ray wheels.
|
||||
source activate tensorflow_p36 && pip install -q -U {{wheel}} Click
|
||||
source activate tensorflow_p36 && pip install -q ray[all] gym[atari]
|
||||
source activate tensorflow_p36 && python workloads/{{workload}}.py
|
||||
params:
|
||||
- name: wheel
|
||||
help: "URL to the ray wheel to test (defaults to latest)."
|
||||
default: https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- name: workload
|
||||
help: "Name of the workload to run."
|
||||
choices:
|
||||
[
|
||||
"actor_deaths",
|
||||
"apex",
|
||||
"impala",
|
||||
"many_actor_tasks",
|
||||
"many_drivers",
|
||||
"many_tasks",
|
||||
"node_failures",
|
||||
"pbt",
|
||||
"serve",
|
||||
"serve_failure",
|
||||
"many_tasks_serialized_ids",
|
||||
]
|
||||
config:
|
||||
tmux: true
|
||||
|
||||
- name: check-load
|
||||
command: uptime
|
||||
help: "Check load of the workload."
|
||||
|
||||
- name: show-output
|
||||
command: tmux capture-pane -p
|
||||
help: "Show tail of the workoad output."
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: []
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
@@ -1,39 +0,0 @@
|
||||
name: microbenchmark
|
||||
description: "Ray's microbenchmark"
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
|
||||
commands:
|
||||
- name: run
|
||||
help: "Start one microbenchmark trial."
|
||||
command: |
|
||||
rm ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl || true
|
||||
wget https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
|
||||
pip uninstall -y -q ray
|
||||
pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
|
||||
OMP_NUM_THREADS=64 ray microbenchmark
|
||||
params:
|
||||
- name: ray_version # Ray version string.
|
||||
default: "1.1.0.dev0"
|
||||
|
||||
- name: commit # Ray commit SHA string.
|
||||
default: "FILL ME IN"
|
||||
|
||||
- name: ray_branch
|
||||
default: "master"
|
||||
config:
|
||||
tmux: true
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: [
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
]
|
||||
@@ -1,37 +0,0 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: ray_stress_tests
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
params:
|
||||
- name: ray_version # Ray version string.
|
||||
default: "0.8.2"
|
||||
|
||||
- name: commit # Ray commit SHA string.
|
||||
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
|
||||
|
||||
- name: ray_branch
|
||||
default: "releases/0.8.2"
|
||||
|
||||
commands:
|
||||
- name: test_many_tasks
|
||||
help: "Run a stress test that executes many tasks."
|
||||
command: |
|
||||
python test_many_tasks.py
|
||||
|
||||
- name: test_dead_actors
|
||||
help: "Run a stress test that spawns and kills many actors."
|
||||
command: |
|
||||
python test_dead_actors.py
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: [
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
]
|
||||
@@ -1 +0,0 @@
|
||||
ray[debug]
|
||||
@@ -1,45 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
workload=""
|
||||
|
||||
usage() {
|
||||
echo "Start one microbenchmark trial."
|
||||
}
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
workload="${i#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: $workload"
|
||||
|
||||
python "$workload.py"
|
||||
@@ -1,57 +0,0 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: rllib_regression_tests
|
||||
|
||||
# description: A short description of the project.
|
||||
# The URL of the repo this project is part of.
|
||||
# repo: ...
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
params:
|
||||
- name: ray_version # Ray version string.
|
||||
default: "0.8.2"
|
||||
|
||||
- name: commit # Ray commit SHA string.
|
||||
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
|
||||
|
||||
- name: ray_branch
|
||||
default: "releases/0.8.2"
|
||||
|
||||
|
||||
environment:
|
||||
# dockerfile: The dockerfile to be built and ran the commands with.
|
||||
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
|
||||
requirements: ray-project/requirements.txt
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: check-load
|
||||
command: uptime
|
||||
help: "Check load of the workload."
|
||||
|
||||
- name: check-gpu
|
||||
command: nvidia-smi
|
||||
help: "Check load of the gpu."
|
||||
|
||||
- name: show-output
|
||||
command: tmux capture-pane -p
|
||||
help: "Show tail of the workoad output."
|
||||
|
||||
- name: run-regression-tests
|
||||
command: |
|
||||
source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml
|
||||
help: "Run rllib regression tests"
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: [
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
]
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml
|
||||
@@ -1,52 +0,0 @@
|
||||
# This file is generated by `ray project create`.
|
||||
|
||||
name: rllib_stress_tests
|
||||
|
||||
# description: A short description of the project.
|
||||
# The URL of the repo this project is part of.
|
||||
# repo: ...
|
||||
|
||||
cluster:
|
||||
config: ray-project/cluster.yaml
|
||||
params:
|
||||
- name: ray_version # Ray version string.
|
||||
default: "0.8.2"
|
||||
|
||||
- name: commit # Ray commit SHA string.
|
||||
default: "f5a1307a608fe5fdbdb04616b22c91f029af329a"
|
||||
|
||||
- name: ray_branch
|
||||
default: "releases/0.8.2"
|
||||
|
||||
|
||||
environment:
|
||||
# dockerfile: The dockerfile to be built and ran the commands with.
|
||||
# dockerimage: The docker image to be used to run the project in, e.g. ubuntu:18.04.
|
||||
requirements: ray-project/requirements.txt
|
||||
|
||||
shell: # Shell commands to be ran for environment setup.
|
||||
- echo "Setting up the environment"
|
||||
|
||||
commands:
|
||||
- name: check-load
|
||||
command: uptime
|
||||
help: "Check load of the workload."
|
||||
|
||||
- name: show-output
|
||||
command: tmux capture-pane -p
|
||||
help: "Show tail of the workoad output."
|
||||
|
||||
- name: run-impala
|
||||
command: bash run.sh
|
||||
help: "Run impala stress test"
|
||||
|
||||
# Pathnames for files and directories that should be saved
|
||||
# in a snapshot but that should not be synced with a# session. Pathnames can be relative to the project
|
||||
# directory or absolute. Generally, this should be files
|
||||
# that were created by an active session, such as
|
||||
# application checkpoints and logs.
|
||||
output_files: [
|
||||
# For example, uncomment this to save the logs from the
|
||||
# last ray job.
|
||||
# "/tmp/ray/session_latest",
|
||||
]
|
||||
@@ -1,7 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
source activate tensorflow_p36
|
||||
|
||||
python3 wait_cluster.py
|
||||
|
||||
rllib train -f atari_impala_xlarge.yaml --ray-address=auto --queue-trials
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Run all test cases, but with a forced num_gpus=1.
|
||||
# TODO: (sven) chose correct dir and run over all RLlib tests and example scripts!
|
||||
source activate tensorflow_p36 && export RAY_FORCE_NUM_GPUS=1 && cd ~ && python -m pytest test_attention_net_learning.py
|
||||
@@ -0,0 +1,2 @@
|
||||
export ray_version="1.0.0rc1"
|
||||
export commit=fd5ddb661e659c2b0c968661d96d0405426912e5
|
||||
@@ -0,0 +1,2 @@
|
||||
Running the kickoff script:
|
||||
|
||||
Symlink
+1
@@ -0,0 +1 @@
|
||||
../doc/dev/RELEASE_PROCESS.rst
|
||||
Executable
+29
@@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
|
||||
source "$2"
|
||||
|
||||
ray_version=${ray_version:-}
|
||||
commit=${commit:-}
|
||||
|
||||
if [[ $ray_version == "" || $commit == "" || $1 == "" ]]
|
||||
then
|
||||
echo "Provide --ray-version, --commit, and --ray-branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "workload: $1"
|
||||
|
||||
DATESTR=$(date +%Y%m%d-%H%M)
|
||||
SESSION="$1-$DATESTR"
|
||||
|
||||
echo "session: $SESSION"
|
||||
|
||||
chmod +x ./run.sh
|
||||
if [ -z "$NO_UP" ]; then
|
||||
anyscale up "$SESSION"
|
||||
fi
|
||||
anyscale push "$SESSION"
|
||||
anyscale exec -n "$SESSION" "./run.sh $1 --ray-version=$ray_version --commit=$commit"
|
||||
+1
-17
@@ -58,23 +58,7 @@ worker_nodes:
|
||||
InstanceMarketOptions:
|
||||
MarketType: spot
|
||||
|
||||
setup_commands:
|
||||
# Install ray.
|
||||
- conda uninstall -y terminado || true
|
||||
- pip install -U pip
|
||||
- pip install terminado
|
||||
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- ray || pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl || true
|
||||
# Installing this without -U to make sure we don't replace the existing Ray installation
|
||||
- pip install ray[rllib]
|
||||
- pip install -U ipdb
|
||||
# There have been some recent problems with torch 1.5 and torchvision 0.6
|
||||
# not recognizing GPUs.
|
||||
# So, we force install torch 1.4 and torchvision 0.5.
|
||||
# https://github.com/pytorch/pytorch/issues/37212#issuecomment-623198624.
|
||||
- pip install torch==1.4.0 torchvision==0.5.0
|
||||
- echo set-window-option -g mouse on > ~/.tmux.conf
|
||||
- echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
|
||||
setup_commands: []
|
||||
|
||||
# Command to start ray on the head node. You don't need to change this.
|
||||
head_start_ray_commands:
|
||||
+61
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
workload=""
|
||||
|
||||
usage() {
|
||||
echo "Start one microbenchmark trial."
|
||||
}
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
workload="${i#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: $workload"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
conda uninstall -y terminado || true
|
||||
pip install -U pip
|
||||
pip install terminado
|
||||
pip install -U "$wheel"
|
||||
pip install "ray[rllib]"
|
||||
pip install -U ipdb
|
||||
# There have been some recent problems with torch 1.5 and torchvision 0.6
|
||||
# not recognizing GPUs.
|
||||
# So, we force install torch 1.4 and torchvision 0.5.
|
||||
# https://github.com/pytorch/pytorch/issues/37212#issuecomment-623198624.
|
||||
pip install torch==1.4.0 torchvision==0.5.0
|
||||
echo set-window-option -g mouse on > ~/.tmux.conf
|
||||
echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
|
||||
|
||||
python "workloads/$workload.py"
|
||||
+1
-16
@@ -33,22 +33,7 @@ worker_nodes:
|
||||
MarketType: spot
|
||||
|
||||
# List of shell commands to run to set up nodes.
|
||||
setup_commands:
|
||||
# Install latest TensorFlow
|
||||
- source activate tensorflow_p36 && conda remove -y --force wrapt || true
|
||||
- source activate tensorflow_p36 && pip install --upgrade pip
|
||||
- source activate tensorflow_p36 && pip install -U tensorflow==1.14
|
||||
- echo set-window-option -g mouse on > ~/.tmux.conf
|
||||
- echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
|
||||
# Serve load testing tool
|
||||
- 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && sudo cp wrk /usr/local/bin'
|
||||
|
||||
# Uncomment the following if you wish to build Ray instead.
|
||||
# - sudo apt-get update
|
||||
# - sudo apt-get install -y build-essential curl unzip
|
||||
# - git clone https://github.com/ray-project/ray || true
|
||||
# - ray/ci/travis/install-bazel.sh
|
||||
# - cd ray/python; git checkout master; git pull; source activate tensorflow_p36 && pip install -e . --verbose
|
||||
setup_commands: []
|
||||
|
||||
# Custom commands that will be run on the head node after common setup.
|
||||
head_setup_commands: []
|
||||
@@ -50,9 +50,17 @@ echo "workload: $workload"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
echo set-window-option -g mouse on > ~/.tmux.conf
|
||||
echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
|
||||
# Serve load testing tool
|
||||
rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && sudo cp wrk /usr/local/bin
|
||||
pip install -U pip
|
||||
unset RAY_ADDRESS
|
||||
source activate tensorflow_p36 && pip install -q -U "$wheel" Click
|
||||
source activate tensorflow_p36 && pip install -q "ray[all]" "gym[atari]"
|
||||
source activate tensorflow_p36 && python "workloads/$workload.py"
|
||||
source activate tensorflow_p36
|
||||
conda remove -y --force wrapt || true
|
||||
pip install --upgrade pip
|
||||
pip install -U tensorflow==1.14
|
||||
pip install -q -U "$wheel" Click
|
||||
pip install -q "ray[all]" "gym[atari]"
|
||||
python "workloads/$workload.py"
|
||||
|
||||
Regular → Executable
+11
-3
@@ -21,6 +21,9 @@ case "$i" in
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
workload="${i#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
@@ -38,15 +41,20 @@ then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: $workload"
|
||||
|
||||
rm "ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl" || true
|
||||
wget "https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl"
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl"
|
||||
|
||||
|
||||
echo set-window-option -g mouse on > ~/.tmux.conf
|
||||
echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
|
||||
pip uninstall -y -q ray
|
||||
pip install -U "ray-$ray_version-cp38-cp38-manylinux1_x86_64.whl"
|
||||
pip install --upgrade pip
|
||||
pip install -U "$wheel"
|
||||
|
||||
unset RAY_ADDRESS
|
||||
OMP_NUM_THREADS=64 ray microbenchmark
|
||||
+1
-7
@@ -26,13 +26,7 @@ head_node:
|
||||
|
||||
|
||||
# List of shell commands to run to set up nodes.
|
||||
setup_commands:
|
||||
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- conda uninstall -y terminado
|
||||
- source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- source activate tensorflow_p36 && pip install ray[rllib] ray[debug]
|
||||
- source activate tensorflow_p36 && pip install torch==1.6 torchvision
|
||||
- source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
|
||||
setup_commands: []
|
||||
|
||||
# Command to start ray on the head node. You don't need to change this.
|
||||
head_start_ray_commands:
|
||||
Executable
+52
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]]
|
||||
then
|
||||
echo "Provide --ray-version, --commit, and --ray-branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: ignored"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
conda uninstall -y terminado
|
||||
source activate tensorflow_p36 && pip install -U pip
|
||||
source activate tensorflow_p36 && pip install -U "$wheel"
|
||||
source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]"
|
||||
source activate tensorflow_p36 && pip install torch==1.6 torchvision
|
||||
source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
|
||||
source activate tensorflow_p36 && rllib train -f compact-regression-test.yaml
|
||||
+1
-6
@@ -86,12 +86,7 @@ file_mounts: {
|
||||
}
|
||||
|
||||
# List of shell commands to run to set up nodes.
|
||||
setup_commands:
|
||||
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- conda uninstall -y terminado
|
||||
- source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- source activate tensorflow_p36 && pip install ray[rllib] ray[debug]
|
||||
- source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
|
||||
setup_commands: []
|
||||
|
||||
# Custom commands that will be run on the head node after common setup.
|
||||
head_setup_commands: []
|
||||
Executable
+56
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]]
|
||||
then
|
||||
echo "Provide --ray-version, --commit, and --ray-branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: ignored"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
conda uninstall -y terminado
|
||||
source activate tensorflow_p36 && pip install -U pip
|
||||
source activate tensorflow_p36 && pip install -U "$wheel"
|
||||
source activate tensorflow_p36 && pip install "ray[rllib]" "ray[debug]"
|
||||
source activate tensorflow_p36 && pip install boto3==1.4.8 cython==0.29.0
|
||||
source activate tensorflow_p36
|
||||
|
||||
python3 wait_cluster.py
|
||||
|
||||
rllib train -f atari_impala_xlarge.yaml --ray-address=auto --queue-trials
|
||||
+1
-4
@@ -26,10 +26,7 @@ head_node:
|
||||
|
||||
|
||||
# List of shell commands to run to set up nodes.
|
||||
setup_commands:
|
||||
- wget --quiet https://s3-us-west-2.amazonaws.com/ray-wheels/{{ray_branch}}/{{commit}}/ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
- conda uninstall -y terminado
|
||||
- source activate tensorflow_p36 && pip install -U ray-{{ray_version}}-cp36-cp36m-manylinux1_x86_64.whl
|
||||
setup_commands: []
|
||||
|
||||
# Command to start ray on the head node. You don't need to change this.
|
||||
head_start_ray_commands:
|
||||
Executable
+53
@@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $ray_version == "" || $commit == "" || $ray_branch == "" ]]
|
||||
then
|
||||
echo "Provide --ray-version, --commit, and --ray-branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: ignored"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
conda uninstall -y terminado
|
||||
source activate tensorflow_p36 && pip install -U pip
|
||||
source activate tensorflow_p36 && pip install -U "$wheel"
|
||||
|
||||
# Run all test cases, but with a forced num_gpus=1.
|
||||
# TODO: (sven) chose correct dir and run over all RLlib tests and example scripts!
|
||||
source activate tensorflow_p36 && export RAY_FORCE_NUM_GPUS=1 && cd ~ && python -m pytest test_attention_net_learning.py
|
||||
+1
-1
@@ -85,7 +85,7 @@ file_mounts: {
|
||||
}
|
||||
|
||||
# List of shell commands to run to set up nodes.
|
||||
setup_commands:
|
||||
setup_commands: []
|
||||
# Uncomment these if you want to build ray from source.
|
||||
# - sudo apt-get -qq update
|
||||
# - sudo apt-get install -y build-essential curl unzip
|
||||
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
ray_version=""
|
||||
commit=""
|
||||
ray_branch=""
|
||||
workload=""
|
||||
|
||||
for i in "$@"
|
||||
do
|
||||
echo "$i"
|
||||
case "$i" in
|
||||
--ray-version=*)
|
||||
ray_version="${i#*=}"
|
||||
|
||||
;;
|
||||
--commit=*)
|
||||
commit="${i#*=}"
|
||||
;;
|
||||
--ray-branch=*)
|
||||
ray_branch="${i#*=}"
|
||||
;;
|
||||
--workload=*)
|
||||
workload="${i#*=}"
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
*)
|
||||
echo "unknown arg, $i"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "version: $ray_version"
|
||||
echo "commit: $commit"
|
||||
echo "branch: $ray_branch"
|
||||
echo "workload: $workload"
|
||||
|
||||
wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux1_x86_64.whl"
|
||||
|
||||
# Install Anaconda.
|
||||
wget --quiet https://repo.continuum.io/archive/Anaconda3-5.0.1-Linux-x86_64.sh || true
|
||||
bash Anaconda3-5.0.1-Linux-x86_64.sh -b -p "$HOME/anaconda3" || true
|
||||
# shellcheck disable=SC2016
|
||||
echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc
|
||||
|
||||
conda uninstall -y terminado
|
||||
source activate tensorflow_p36 && pip install -U pip
|
||||
source activate tensorflow_p36 && pip install -U "$wheel"
|
||||
|
||||
pip install -U pip
|
||||
conda uninstall -y terminado || true
|
||||
pip install terminado
|
||||
pip install boto3==1.4.8 cython==0.29.0
|
||||
python "workloads/$workload.py"
|
||||
Reference in New Issue
Block a user