From b23b6addfcfd4be7a27e58aefbecb3fd7dd6510c Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Thu, 30 Apr 2020 15:48:11 +0200 Subject: [PATCH] [RLlib] Stabilize Pendulum-v0 regression test cases. (#8232) Stabilize Pendulum regression test cases. --- .travis.yml | 21 ++++++++++-- rllib/BUILD | 34 +++++++++++++++---- .../cartpole-a2c-microbatch.yaml | 3 +- ...cartpole-a3c.yaml => cartpole-a3c-tf.yaml} | 3 +- .../regression_tests/cartpole-ddppo.yaml | 3 +- ....yaml => cartpole-dqn-param-noise-tf.yaml} | 0 ...ml => cartpole-dqn-param-noise-torch.yaml} | 0 .../regression_tests/pendulum-ddpg-tf.yaml | 2 +- .../regression_tests/pendulum-ddpg-torch.yaml | 2 +- ...pendulum-ppo.yaml => pendulum-ppo-tf.yaml} | 7 ++-- .../regression_tests/pendulum-ppo-torch.yaml | 21 ++++++++++++ .../regression_tests/pendulum-td3.yaml | 3 +- 12 files changed, 82 insertions(+), 17 deletions(-) rename rllib/tuned_examples/regression_tests/{cartpole-a3c.yaml => cartpole-a3c-tf.yaml} (78%) rename rllib/tuned_examples/regression_tests/{cartpole-dqn-tf-param-noise.yaml => cartpole-dqn-param-noise-tf.yaml} (100%) rename rllib/tuned_examples/regression_tests/{cartpole-dqn-torch-param-noise.yaml => cartpole-dqn-param-noise-torch.yaml} (100%) rename rllib/tuned_examples/regression_tests/{pendulum-ppo.yaml => pendulum-ppo-tf.yaml} (79%) create mode 100644 rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml diff --git a/.travis.yml b/.travis.yml index bed372688..775f18fd4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -141,7 +141,7 @@ matrix: install: - . ./ci/travis/ci.sh build script: - - travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/... + - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/... # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml). # Requested by Edi (MS): Test all learning capabilities with tf1.x @@ -159,7 +159,24 @@ matrix: install: - . ./ci/travis/ci.sh build script: - - travis_wait 120 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests rllib/... + - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_tf rllib/... + + # RLlib: Learning tests with torch (from rllib/tuned_examples/regression_tests/*.yaml). + - os: linux + env: + - RLLIB_TESTING=1 + - RLLIB_REGRESSION_TESTS_TORCH=1 + - TF_VERSION=2.0.0b1 + - TFP_VERSION=0.8 + - TORCH_VERSION=1.4 + - PYTHON=3.6 + - PYTHONWARNINGS=ignore + before_install: + - . ./ci/travis/ci.sh init RAY_CI_RLLIB_FULL_AFFECTED + install: + - . ./ci/travis/ci.sh build + script: + - travis_wait 90 bazel test --config=ci --test_output=streamed --build_tests_only --test_tag_filters=learning_tests_torch rllib/... # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning). # Agent single tests (compilation, loss-funcs, etc..). diff --git a/rllib/BUILD b/rllib/BUILD index 2f142d5a1..e87262f7f 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -41,23 +41,45 @@ # -------------------------------------------------------------------- py_test( - name = "run_regression_tests_cartpole", + name = "run_regression_tests_cartpole_tf", main = "tests/run_regression_tests.py", - tags = ["learning_tests", "learning_tests_cartpole"], + tags = ["learning_tests_tf", "learning_tests_cartpole"], size = "enormous", # = 60min timeout srcs = ["tests/run_regression_tests.py"], - data = glob(["tuned_examples/regression_tests/cartpole-*.yaml"]), + data = glob(["tuned_examples/regression_tests/cartpole-*-tf.yaml"]), # Pass `BAZEL` option and the path to look for yaml regression files. args = ["BAZEL", "tuned_examples/regression_tests"] ) py_test( - name = "run_regression_tests_pendulum", + name = "run_regression_tests_cartpole_torch", main = "tests/run_regression_tests.py", - tags = ["learning_tests", "learning_tests_pendulum"], + tags = ["learning_tests_torch", "learning_tests_cartpole"], size = "enormous", # = 60min timeout srcs = ["tests/run_regression_tests.py"], - data = glob(["tuned_examples/regression_tests/pendulum-*.yaml"]), + data = glob(["tuned_examples/regression_tests/cartpole-*-torch.yaml"]), + # Pass `BAZEL` option and the path to look for yaml regression files. + args = ["BAZEL", "tuned_examples/regression_tests"] +) + +py_test( + name = "run_regression_tests_pendulum_tf", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_tf", "learning_tests_pendulum"], + size = "enormous", # = 60min timeout + srcs = ["tests/run_regression_tests.py"], + data = glob(["tuned_examples/regression_tests/pendulum-*-tf.yaml"]), + # Pass `BAZEL` option and the path to look for yaml regression files. + args = ["BAZEL", "tuned_examples/regression_tests"] +) + +py_test( + name = "run_regression_tests_pendulum_torch", + main = "tests/run_regression_tests.py", + tags = ["learning_tests_torch", "learning_tests_pendulum"], + size = "enormous", # = 60min timeout + srcs = ["tests/run_regression_tests.py"], + data = glob(["tuned_examples/regression_tests/pendulum-*-torch.yaml"]), # Pass `BAZEL` option and the path to look for yaml regression files. args = ["BAZEL", "tuned_examples/regression_tests"] ) diff --git a/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml b/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml index ac1bd74ed..7ddafc01d 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml +++ b/rllib/tuned_examples/regression_tests/cartpole-a2c-microbatch.yaml @@ -1,10 +1,11 @@ -cartpole-a2c-microbatch: +cartpole-a2c-microbatch-tf: env: CartPole-v0 run: A2C stop: episode_reward_mean: 100 timesteps_total: 100000 config: + use_pytorch: false num_workers: 1 gamma: 0.95 microbatch_size: 50 diff --git a/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml b/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml similarity index 78% rename from rllib/tuned_examples/regression_tests/cartpole-a3c.yaml rename to rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml index 08ff2206f..3713db3a9 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml +++ b/rllib/tuned_examples/regression_tests/cartpole-a3c-tf.yaml @@ -1,9 +1,10 @@ -cartpole-a3c: +cartpole-a3c-tf: env: CartPole-v0 run: A3C stop: episode_reward_mean: 100 timesteps_total: 100000 config: + use_pytorch: false num_workers: 1 gamma: 0.95 diff --git a/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml b/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml index 0d9516cbb..5a438a15e 100644 --- a/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml +++ b/rllib/tuned_examples/regression_tests/cartpole-ddppo.yaml @@ -1,8 +1,9 @@ -cartpole-ddppo: +cartpole-ddppo-torch: env: CartPole-v0 run: DDPPO stop: episode_reward_mean: 100 timesteps_total: 100000 config: + use_pytorch: true num_gpus_per_worker: 0 diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-tf-param-noise.yaml b/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml similarity index 100% rename from rllib/tuned_examples/regression_tests/cartpole-dqn-tf-param-noise.yaml rename to rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-tf.yaml diff --git a/rllib/tuned_examples/regression_tests/cartpole-dqn-torch-param-noise.yaml b/rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml similarity index 100% rename from rllib/tuned_examples/regression_tests/cartpole-dqn-torch-param-noise.yaml rename to rllib/tuned_examples/regression_tests/cartpole-dqn-param-noise-torch.yaml diff --git a/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml b/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml index 9e3a1501c..76147e7b7 100644 --- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml +++ b/rllib/tuned_examples/regression_tests/pendulum-ddpg-tf.yaml @@ -2,7 +2,7 @@ pendulum-ddpg-tf: env: Pendulum-v0 run: DDPG stop: - episode_reward_mean: -900 + episode_reward_mean: -700 timesteps_total: 100000 config: use_pytorch: false diff --git a/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml b/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml index 62c62d224..d1ac7a56e 100644 --- a/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml +++ b/rllib/tuned_examples/regression_tests/pendulum-ddpg-torch.yaml @@ -2,7 +2,7 @@ pendulum-ddpg-torch: env: Pendulum-v0 run: DDPG stop: - episode_reward_mean: -900 + episode_reward_mean: -700 timesteps_total: 100000 config: use_pytorch: true diff --git a/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml similarity index 79% rename from rllib/tuned_examples/regression_tests/pendulum-ppo.yaml rename to rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml index b6bfbaf79..ac3afac06 100644 --- a/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml +++ b/rllib/tuned_examples/regression_tests/pendulum-ppo-tf.yaml @@ -1,10 +1,11 @@ -pendulum-ppo: +pendulum-ppo-tf: env: Pendulum-v0 run: PPO stop: - episode_reward_mean: -200 - timesteps_total: 500000 + episode_reward_mean: -500 + timesteps_total: 400000 config: + use_pytorch: false train_batch_size: 2048 vf_clip_param: 10.0 num_workers: 0 diff --git a/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml b/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml new file mode 100644 index 000000000..17132f5ce --- /dev/null +++ b/rllib/tuned_examples/regression_tests/pendulum-ppo-torch.yaml @@ -0,0 +1,21 @@ +pendulum-ppo-torch: + env: Pendulum-v0 + run: PPO + stop: + episode_reward_mean: -500 + timesteps_total: 400000 + config: + use_pytorch: true + train_batch_size: 2048 + vf_clip_param: 10.0 + num_workers: 0 + num_envs_per_worker: 10 + lambda: 0.1 + gamma: 0.95 + lr: 0.0003 + sgd_minibatch_size: 64 + num_sgd_iter: 10 + model: + fcnet_hiddens: [64, 64] + batch_mode: complete_episodes + observation_filter: MeanStdFilter diff --git a/rllib/tuned_examples/regression_tests/pendulum-td3.yaml b/rllib/tuned_examples/regression_tests/pendulum-td3.yaml index 7690e8476..ef4d0fb9b 100644 --- a/rllib/tuned_examples/regression_tests/pendulum-td3.yaml +++ b/rllib/tuned_examples/regression_tests/pendulum-td3.yaml @@ -1,6 +1,7 @@ -pendulum-td3: +pendulum-td3-tf: env: Pendulum-v0 run: TD3 stop: + use_pytorch: false episode_reward_mean: -900 timesteps_total: 100000