Enable direct calls by default (#6367)

* wip

* add

* timeout fix

* const ref

* comments

* fix

* fix

* Move actor state into actor handle

* comments 2

* enable by default

* temp reorder

* some fixes

* add debug code

* tmp

* fix

* wip

* remove dbg

* fix compile

* fix

* fix check

* remove non direct tests

* Increment ref count before resolving value

* rename

* fix another bug

* tmp

* tmp

* Fix object pinning

* build change

* lint

* ActorManager

* tmp

* ActorManager

* fix test component failures

* Remove old code

* Remove unused

* fix

* fix

* fix resources

* fix advanced

* eric's diff

* blacklist

* blacklist

* cleanup

* annotate

* disable tests for now

* remove

* fix

* fix

* clean up verbosity

* fix test

* fix concurrency test

* Update .travis.yml

* Update .travis.yml

* Update .travis.yml

* split up analysis suite

* split up trial runner suite

* fix detached direct actors

* fix

* split up advanced tesT

* lint

* fix core worker test hang

* fix bad check fail which breaks test_cluster.py in tune

* fix some minor diffs in test_cluster

* less workers

* make less stressful

* split up test

* retry flaky tests

* remove old test flags

* fixes

* lint

* Update worker_pool.cc

* fix race

* fix

* fix bugs in node failure handling

* fix race condition

* fix bugs in node failure handling

* fix race condition

* nits

* fix test

* disable heartbeatS

* disable heartbeatS

* fix

* fix

* use worker id

* fix max fail

* debug exit

* fix merge, and apply [PATCH] fix concurrency test

* [patch] fix core worker test hang

* remove NotifyActorCreation, and return worker on completion of actor creation task

* remove actor diied callback

* Update core_worker.cc

* lint

* use task manager

* fix merge

* fix deadlock

* wip

* merge conflits

* fix

* better sysexit handling

* better sysexit handling

* better sysexit handling

* check id

* better debug

* task failed msg

* task failed msg

* retry failed tasks with delay

* retry failed tasks with delay

* clip deps

* fix

* fix core worker tests

* fix task manager test

* fix all tests

* cleanup

* set to 0 for direct tests

* dont check worker id for ownership rpc

* dont check worker id for ownership rpc

* debug messages

* add comment

* remove debug statements

* nit

* check worker id

* fix test

* owner

* fix tests
This commit is contained in:
Eric Liang
2019-12-13 13:58:04 -08:00
committed by GitHub
parent 3754effafc
commit be5dd8eb5e
16 changed files with 174 additions and 362 deletions
+4 -4
View File
@@ -147,13 +147,13 @@ script:
- ./ci/suppress_output bazel test --build_tests_only --show_progress_rate_limit=100 --test_output=errors //:all
# ray serve tests
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || ./ci/suppress_output python python/ray/experimental/serve/examples/echo_full.py; fi
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || RAY_FORCE_DIRECT=0 python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/serve/tests; fi
- if [ $RAY_CI_SERVE_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || RAY_FORCE_DIRECT=0 ./ci/suppress_output python python/ray/experimental/serve/examples/echo_full.py; fi
# ray tests
# Python3.5+ only. Otherwise we will get `SyntaxError` regardless of how we set the tester.
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || python -m pytest -v --durations=5 --timeout=300 python/ray/tests/py3_test.py; fi
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || RAY_FORCE_DIRECT=0 python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py; fi
- if [ $RAY_CI_PYTHON_AFFECTED == "1" ]; then python -c 'import sys;exit(sys.version_info>=(3,5))' || RAY_FORCE_DIRECT=0 python -m pytest -v --durations=5 --timeout=300 python/ray/tests/py3_test.py; fi
# py bazel tests, run using local strategy since PY2 breaks with sandbox
- ./ci/keep_alive bazel test --spawn_strategy=local --flaky_test_attempts=3 --python_version=$BAZEL_PYTHON_VERSION --nocache_test_results --test_verbose_timeout_warnings --incompatible_allow_python_version_transitions=false --incompatible_py3_is_default=false --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors --test_tag_filters=-jenkins_only python/ray/...
+104 -104
View File
@@ -1,270 +1,270 @@
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_catalog.py
/ray/ci/suppress_output python /ray/rllib/tests/test_catalog.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_optimizers.py
/ray/ci/suppress_output python /ray/rllib/tests/test_optimizers.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_filters.py
/ray/ci/suppress_output python /ray/rllib/tests/test_filters.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_evaluators.py
/ray/ci/suppress_output python /ray/rllib/tests/test_evaluators.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_eager_support.py
/ray/ci/suppress_output python /ray/rllib/tests/test_eager_support.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env PongDeterministic-v0 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pong-ram-v4 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env PongDeterministic-v0 \
--run A2C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"free_log_std": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"remote_worker_envs": true, "remote_env_batch_wait_ms": 99999999, "num_envs_per_worker": 2, "num_workers": 1, "train_batch_size": 100, "sgd_minibatch_size": 50}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run PPO \
--stop '{"training_iteration": 2}' \
--config '{"remote_worker_envs": true, "num_envs_per_worker": 2, "num_workers": 1, "train_batch_size": 100, "sgd_minibatch_size": 50}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run APPO \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "num_gpus": 0}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run ES \
--stop '{"training_iteration": 1}' \
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pong-v0 \
--run ES \
--stop '{"training_iteration": 1}' \
--config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run A3C \
--stop '{"training_iteration": 1}' \
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 1}' \
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run APEX \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "timesteps_per_iteration": 1000, "num_gpus": 0, "min_iter_time_s": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env FrozenLake-v0 \
--run DQN \
--stop '{"training_iteration": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env FrozenLake-v0 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"num_sgd_iter": 10, "sgd_minibatch_size": 64, "train_batch_size": 1000, "num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env PongDeterministic-v4 \
--run DQN \
--stop '{"training_iteration": 1}' \
--config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env MontezumaRevenge-v0 \
--run PPO \
--stop '{"training_iteration": 1}' \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "model": {"use_lstm": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "use_pytorch": true}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pong-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env FrozenLake-v0 \
--run PG \
--stop '{"training_iteration": 1}' \
--config '{"sample_batch_size": 500, "num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run DDPG \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "num_aggregation_workers": 2, "min_iter_time_s": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "model": {"use_lstm": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run IMPALA \
--stop '{"training_iteration": 1}' \
--config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0, "model": {"use_lstm": true}}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env MountainCarContinuous-v0 \
--run DDPG \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env MountainCarContinuous-v0 \
--run DDPG \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run APEX_DDPG \
--ray-num-cpus 8 \
@@ -272,7 +272,7 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run APEX_DDPG \
--ray-num-cpus 8 \
@@ -280,199 +280,199 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
--config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1, "batch_mode": "complete_episodes", "parameter_noise": false}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run MARWIL \
--stop '{"training_iteration": 1}' \
--config '{"input": "/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "shuffle_buffer_size": 10}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v0 \
--run DQN \
--stop '{"training_iteration": 1}' \
--config '{"input": "/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "soft_q": true}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_local.py
/ray/ci/suppress_output python /ray/rllib/tests/test_local.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_reproducibility.py
/ray/ci/suppress_output python /ray/rllib/tests/test_reproducibility.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_dependency.py
/ray/ci/suppress_output python /ray/rllib/tests/test_dependency.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_legacy.py
/ray/ci/suppress_output python /ray/rllib/tests/test_legacy.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_io.py
/ray/ci/suppress_output python /ray/rllib/tests/test_io.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_checkpoint_restore.py
/ray/ci/suppress_output python /ray/rllib/tests/test_checkpoint_restore.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_rollout_worker.py
/ray/ci/suppress_output python /ray/rllib/tests/test_rollout_worker.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_nested_spaces.py
/ray/ci/suppress_output python /ray/rllib/tests/test_nested_spaces.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_external_env.py
/ray/ci/suppress_output python /ray/rllib/tests/test_external_env.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_external_multi_agent_env.py
/ray/ci/suppress_output python /ray/rllib/tests/test_external_multi_agent_env.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_keras_model.py --run=A2C --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=A2C --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_keras_model.py --run=PPO --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=PPO --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/parametric_action_cartpole.py --run=PPO --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PPO --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/parametric_action_cartpole.py --run=DQN --stop=50
/ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=DQN --stop=50
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_lstm.py
/ray/ci/suppress_output python /ray/rllib/tests/test_lstm.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PPO
/ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PPO
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PG
/ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PG
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DQN
/ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DQN
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DDPG
/ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DDPG
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_multi_agent_env.py
/ray/ci/suppress_output python /ray/rllib/tests/test_multi_agent_env.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_supported_spaces.py
/ray/ci/suppress_output python /ray/rllib/tests/test_supported_spaces.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_env_with_subprocess.py
/ray/ci/suppress_output python /ray/rllib/tests/test_env_with_subprocess.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/tests/test_rollout.sh
/ray/ci/suppress_output /ray/rllib/tests/test_rollout.sh
# Run all single-agent regression tests (3x retry each)
for yaml in $(ls $ROOT_DIR/../../rllib/tuned_examples/regression_tests); do
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/run_regression_tests.py \
/ray/ci/suppress_output python /ray/rllib/tests/run_regression_tests.py \
/ray/rllib/tuned_examples/regression_tests/$yaml
done
# Try a couple times since it's stochastic
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/multiagent_pendulum.py || \
/ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py || \
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/multiagent_pendulum.py || \
/ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py || \
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/multiagent_pendulum.py
/ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2
/ray/ci/suppress_output python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/multiagent_two_trainers.py --num-iters=2
/ray/ci/suppress_output python /ray/rllib/examples/multiagent_two_trainers.py --num-iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_avail_actions_qmix.py
/ray/ci/suppress_output python /ray/rllib/tests/test_avail_actions_qmix.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/cartpole_lstm.py --run=PPO --stop=200
/ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --run=PPO --stop=200
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/cartpole_lstm.py --run=IMPALA --stop=100
/ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --run=IMPALA --stop=100
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/cartpole_lstm.py --stop=200 --use-prev-action-reward
/ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --stop=200 --use-prev-action-reward
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_loss.py --iters=2
/ray/ci/suppress_output python /ray/rllib/examples/custom_loss.py --iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/rollout_worker_custom_workflow.py
/ray/ci/suppress_output python /ray/rllib/examples/rollout_worker_custom_workflow.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/eager_execution.py --iters=2
/ray/ci/suppress_output python /ray/rllib/examples/eager_execution.py --iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_tf_policy.py --iters=2
/ray/ci/suppress_output python /ray/rllib/examples/custom_tf_policy.py --iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_torch_policy.py --iters=2
/ray/ci/suppress_output python /ray/rllib/examples/custom_torch_policy.py --iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/rollout_worker_custom_workflow.py
/ray/ci/suppress_output python /ray/rllib/examples/rollout_worker_custom_workflow.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_metrics_and_callbacks.py --num-iters=2
/ray/ci/suppress_output python /ray/rllib/examples/custom_metrics_and_callbacks.py --num-iters=2
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/contrib/random_agent/random_agent.py
/ray/ci/suppress_output python /ray/rllib/contrib/random_agent/random_agent.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/contrib/alpha_zero/examples/train_cartpole.py --training-iteration=1
/ray/ci/suppress_output python /ray/rllib/contrib/alpha_zero/examples/train_cartpole.py --training-iteration=1
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/centralized_critic.py --stop=2000
/ray/ci/suppress_output python /ray/rllib/examples/centralized_critic.py --stop=2000
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/centralized_critic_2.py --stop=2000
/ray/ci/suppress_output python /ray/rllib/examples/centralized_critic_2.py --stop=2000
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/twostep_game.py --stop=2000 --run=contrib/MADDPG
/ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=contrib/MADDPG
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/twostep_game.py --stop=2000 --run=PG
/ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=PG
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/twostep_game.py --stop=2000 --run=QMIX
/ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=QMIX
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/twostep_game.py --stop=2000 --run=APEX_QMIX
/ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=APEX_QMIX
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/autoregressive_action_dist.py --stop=150
/ray/ci/suppress_output python /ray/rllib/examples/autoregressive_action_dist.py --stop=150
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env PongDeterministic-v4 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "use_pytorch": true, "sample_async": false, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84}, "preprocessor_pref": "rllib"}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env CartPole-v1 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env Pendulum-v0 \
--run A3C \
--stop '{"training_iteration": 1}' \
--config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct /ray/rllib/train.py \
/ray/ci/suppress_output /ray/rllib/train.py \
--env PongDeterministic-v4 \
--run IMPALA \
--stop='{"timesteps_total": 40000}' \
@@ -480,13 +480,13 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
--config '{"num_workers": 1, "num_gpus": 0, "num_envs_per_worker": 32, "sample_batch_size": 50, "train_batch_size": 50, "learner_queue_size": 1}'
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/agents/impala/vtrace_test.py
/ray/ci/suppress_output python /ray/rllib/agents/impala/vtrace_test.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/tests/test_ignore_worker_failure.py
/ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/ci/suppress_output --force-direct python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
+42 -42
View File
@@ -31,147 +31,147 @@ fi
echo "Using Docker image" $DOCKER_SHA
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
pytest /ray/python/ray/tune/tests/test_cluster.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
pytest /ray/python/ray/tune/tests/test_actor_reuse.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
pytest /ray/python/ray/tune/tests/test_tune_restore.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/tests/example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
bash -c 'pip install -U tensorflow && python /ray/python/ray/tune/tests/test_logger.py'
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
bash -c 'pip install -U tensorflow==1.15 && python /ray/python/ray/tune/tests/test_logger.py'
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
bash -c 'pip install -U tensorflow==1.14 && python /ray/python/ray/tune/tests/test_logger.py'
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
bash -c 'pip install -U tensorflow==1.12 && python /ray/python/ray/tune/tests/test_logger.py'
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e MPLBACKEND=Agg $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e MPLBACKEND=Agg $DOCKER_SHA \
python /ray/python/ray/tune/tests/tutorial.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/pbt_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/hyperband_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/async_hyperband_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tf_mnist_example.py --smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/lightgbm_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/xgboost_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/logging_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/mlflow_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/bayesopt_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/hyperopt_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e SIGOPT_KEY $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} -e SIGOPT_KEY $DOCKER_SHA \
python /ray/python/ray/tune/examples/sigopt_example.py \
--smoke-test
# Runs only on Python3
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/nevergrad_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/tune_mnist_keras.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/mnist_pytorch.py --smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/mnist_pytorch_trainable.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/genetic_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/skopt_example.py \
--smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/tune/examples/pbt_memnn_example.py \
--smoke-test
# uncomment once statsmodels is updated.
# $SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
# $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
# python /ray/python/ray/tune/examples/bohb_example.py \
# --smoke-test
######################## SGD TESTS #################################
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python -m pytest /ray/python/ray/experimental/sgd/tests
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/train_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/train_example.py --num-replicas=2
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/tune_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/tune_example.py --num-replicas=2
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_pytorch_example.py --smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_pytorch_example.py --smoke-test --num-replicas=2
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_pytorch_example.py --smoke-test --tune
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/tensorflow_train_example.py
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/tensorflow_train_example.py --num-replicas=2
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/tensorflow_train_example.py --tune
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_tf_example.py --smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_tf_example.py --num-replicas 2 --smoke-test
$SUPPRESS_OUTPUT --force-direct docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/experimental/sgd/examples/cifar_tf_example.py --num-replicas 2 --smoke-test --augment-data
-6
View File
@@ -19,12 +19,6 @@ watchdog() {
watchdog & 2>/dev/null
WATCHDOG_PID=$!
if [ "$1" == "--force-direct" ]; then
echo "Setting RAY_FORCE_DIRECT=1"
export RAY_FORCE_DIRECT=1
shift
fi
time "$@" >$TMPFILE 2>&1
CODE=$?
+1 -1
View File
@@ -17,7 +17,7 @@ def env_integer(key, default):
def direct_call_enabled():
return bool(int(os.environ.get("RAY_FORCE_DIRECT", "0")))
return bool(int(os.environ.get("RAY_FORCE_DIRECT", "1")))
ID_SIZE = 20
+11 -36
View File
@@ -151,19 +151,22 @@ def main():
timeit("1:1 actor calls async", actor_async, 1000)
a = Actor.options(is_direct_call=True).remote()
a = Actor.options(max_concurrency=16).remote()
def actor_concurrent():
ray.get([a.small_value.remote() for _ in range(1000)])
timeit("1:1 direct actor calls async", actor_concurrent, 1000)
timeit("1:1 actor calls concurrent", actor_concurrent, 1000)
a = Actor.options(is_direct_call=True, max_concurrency=16).remote()
n = 5000
n_cpu = multiprocessing.cpu_count() // 2
actors = [Actor._remote() for _ in range(n_cpu)]
client = Client.remote(actors)
def actor_concurrent():
ray.get([a.small_value.remote() for _ in range(1000)])
def actor_async_direct():
ray.get(client.small_value_batch.remote(n))
timeit("1:1 direct actor calls concurrent", actor_concurrent, 1000)
timeit("1:n actor calls async", actor_async_direct, n * len(actors))
n_cpu = multiprocessing.cpu_count() // 2
a = [Actor.remote() for _ in range(n_cpu)]
@@ -177,44 +180,16 @@ def main():
timeit("n:n actor calls async", actor_multi2, m * n)
n = 5000
n_cpu = multiprocessing.cpu_count() // 2
actors = [Actor._remote(is_direct_call=True) for _ in range(n_cpu)]
client = Client.remote(actors)
def actor_async_direct():
ray.get(client.small_value_batch.remote(n))
timeit("1:n direct actor calls async", actor_async_direct, n * len(actors))
clients = [Client.remote(a) for a in actors]
def actor_multi2_direct():
ray.get([c.small_value_batch.remote(n) for c in clients])
timeit("n:n direct actor calls async", actor_multi2_direct,
n * len(clients))
n = 1000
actors = [Actor._remote(is_direct_call=True) for _ in range(n_cpu)]
actors = [Actor._remote() for _ in range(n_cpu)]
clients = [Client.remote(a) for a in actors]
def actor_multi2_direct_arg():
ray.get([c.small_value_batch_arg.remote(n) for c in clients])
timeit("n:n direct actor calls with arg async", actor_multi2_direct_arg,
timeit("n:n actor calls with arg async", actor_multi2_direct_arg,
n * len(clients))
n = 1000
actors = [Actor._remote(is_direct_call=True) for _ in range(n_cpu)]
clients = [Client.remote(a) for a in actors]
def actor_multi2_direct_arg():
ray.get([c.small_value_batch_arg.remote(n) for c in clients])
timeit("multi client direct actor calls with arg async",
actor_multi2_direct_arg, n * len(clients))
if __name__ == "__main__":
main()
+6 -51
View File
@@ -6,14 +6,6 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_actor_direct",
size = "medium",
srcs = ["test_actor_direct.py", "test_actor.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_actor_resources",
size = "medium",
@@ -22,19 +14,12 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_actor_resources_direct",
size = "medium",
srcs = ["test_actor_resources_direct.py", "test_actor_resources.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_actor_failures",
size = "medium",
srcs = ["test_actor_failures.py"],
tags = ["exclusive"],
# TODO(ekl) enable this once we support actor reconstruction again
tags = ["exclusive", "manual"],
deps = ["//:ray_lib"],
)
@@ -46,14 +31,6 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_basic_direct",
size = "medium",
srcs = ["test_basic_direct.py", "test_basic.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_advanced",
size = "medium",
@@ -126,14 +103,6 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_stress_direct",
size = "medium",
srcs = ["test_stress_direct.py", "test_stress.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_stress_sharded",
size = "medium",
@@ -142,19 +111,12 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_stress_sharded_direct",
size = "medium",
srcs = ["test_stress_sharded_direct.py", "test_stress_sharded.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_stress_failure",
size = "large",
srcs = ["test_stress_failure.py"],
tags = ["exclusive"],
# TODO(ekl) enable again once we support direct call reconstruction
tags = ["exclusive", "manual"],
deps = ["//:ray_lib"],
)
@@ -209,14 +171,6 @@ py_test(
deps = ["//:ray_lib"],
)
py_test(
name = "test_failure_direct",
size = "medium",
srcs = ["test_failure_direct.py", "test_failure.py"],
tags = ["exclusive"],
deps = ["//:ray_lib"],
)
py_test(
name = "test_garbage_collection",
size = "small",
@@ -276,7 +230,8 @@ py_test(
name = "test_monitors",
size = "small",
srcs = ["test_monitors.py"],
tags = ["exclusive"],
# TODO(ekl) tasks() and objects() are different in direct call mode.
tags = ["exclusive", "manual"],
deps = ["//:ray_lib"],
)
-16
View File
@@ -1,16 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main(
["-v",
os.path.join(os.path.dirname(__file__), "test_actor.py")]))
@@ -1,17 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main([
"-v",
os.path.join(os.path.dirname(__file__), "test_actor_resources.py")
]))
-16
View File
@@ -1,16 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main(
["-v",
os.path.join(os.path.dirname(__file__), "test_basic.py")]))
-16
View File
@@ -1,16 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main(
["-v",
os.path.join(os.path.dirname(__file__), "test_failure.py")]))
@@ -1,18 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main([
"-v",
os.path.join(
os.path.dirname(__file__), "test_multinode_failures.py")
]))
-16
View File
@@ -1,16 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main(
["-v",
os.path.join(os.path.dirname(__file__), "test_stress.py")]))
@@ -1,17 +0,0 @@
"""Wrapper script that sets RAY_FORCE_DIRECT."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pytest
import sys
import os
if __name__ == "__main__":
os.environ["RAY_FORCE_DIRECT"] = "1"
sys.exit(
pytest.main([
"-v",
os.path.join(os.path.dirname(__file__), "test_stress_sharded.py")
]))
+3 -1
View File
@@ -14,6 +14,7 @@ import types
import ray.cloudpickle as cloudpickle
from ray.tune import TuneError
from ray.tune.progress_reporter import trial_progress_str
from ray.tune.ray_trial_executor import RayTrialExecutor
from ray.tune.result import (TIME_THIS_ITER_S, RESULT_DUPLICATE,
SHOULD_CHECKPOINT)
@@ -380,7 +381,8 @@ class TrialRunner(object):
def debug_string(self, delim="\n"):
messages = [
self._scheduler_alg.debug_string(),
self.trial_executor.debug_string()
self.trial_executor.debug_string(),
trial_progress_str(self.get_trials()),
]
return delim.join(messages)
+3 -1
View File
@@ -73,4 +73,6 @@ class TestEagerSupport(unittest.TestCase):
if __name__ == "__main__":
unittest.main(verbosity=2)
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))