diff --git a/.travis.yml b/.travis.yml index 556901ae7..23b6f46b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -192,7 +192,7 @@ matrix: - ./ci/suppress_output ./ci/travis/install-ray.sh script: - if [ $RAY_CI_RLLIB_AFFECTED != "1" ]; then exit; fi - - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/... + - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/... # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/regression_tests/*.yaml). # Requested by Edi (MS): Test all learning capabilities with tf1.x @@ -213,7 +213,7 @@ matrix: - ./ci/suppress_output ./ci/travis/install-ray.sh script: - if [ $RAY_CI_RLLIB_FULL_AFFECTED != "1" ]; then exit; fi - - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=streamed rllib/... + - travis_wait 60 bazel test --build_tests_only --test_tag_filters=learning_tests --spawn_strategy=local --flaky_test_attempts=3 --nocache_test_results --test_verbose_timeout_warnings --progress_report_interval=100 --show_progress_rate_limit=100 --show_timestamps --test_output=errors rllib/... # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning). # Agent single tests (compilation, loss-funcs, etc..). diff --git a/rllib/BUILD b/rllib/BUILD index af1ccbcba..f858baba9 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -41,12 +41,23 @@ # -------------------------------------------------------------------- py_test( - name = "run_regression_tests", + name = "run_regression_tests_cartpole", main = "tests/run_regression_tests.py", - tags = ["learning_tests"], + tags = ["learning_tests", "learning_tests_cartpole"], size = "enormous", # = 60min timeout srcs = ["tests/run_regression_tests.py"], - data = glob(["tuned_examples/regression_tests/*.yaml"]), + data = glob(["tuned_examples/regression_tests/cartpole*.yaml"]), + # Pass `BAZEL` option and the path to look for yaml regression files. + args = ["BAZEL", "tuned_examples/regression_tests"] +) + +py_test( + name = "run_regression_tests_pendulum", + main = "tests/run_regression_tests.py", + tags = ["learning_tests", "learning_tests_pendulum"], + size = "enormous", # = 60min timeout + srcs = ["tests/run_regression_tests.py"], + data = glob(["tuned_examples/regression_tests/pendulum*.yaml"]), # Pass `BAZEL` option and the path to look for yaml regression files. args = ["BAZEL", "tuned_examples/regression_tests"] ) diff --git a/rllib/agents/dqn/tests/test_dqn.py b/rllib/agents/dqn/tests/test_dqn.py index f1367ebae..e36a19da3 100644 --- a/rllib/agents/dqn/tests/test_dqn.py +++ b/rllib/agents/dqn/tests/test_dqn.py @@ -97,37 +97,17 @@ class TestDQN(unittest.TestCase): actions.append(trainer.compute_action(obs)) check(np.std(actions), 0.0, false=True) - if eager_mode_ctx: - eager_mode_ctx.__exit__(None, None, None) - def test_dqn_parameter_noise_exploration(self): """Tests, whether a DQN Agent works with ParameterNoise.""" obs = np.array(0) + core_config = dqn.DEFAULT_CONFIG.copy() + core_config["num_workers"] = 0 # Run locally. + core_config["env_config"] = {"is_slippery": False, "map_name": "4x4"} - for fw in ["eager", "tf", "torch"]: - if fw == "torch": - continue - print("framework={}".format(fw)) - - core_config = dqn.DEFAULT_CONFIG.copy() - core_config["num_workers"] = 0 # Run locally. - core_config["env_config"] = { - "is_slippery": False, - "map_name": "4x4" - } - core_config["eager"] = fw == "eager" - core_config["use_pytorch"] = fw == "torch" + for fw in framework_iterator(core_config, ["tf", "eager"]): config = core_config.copy() - eager_mode_ctx = None - if fw == "tf": - assert not tf.executing_eagerly() - elif fw == "eager": - eager_mode_ctx = eager_mode() - eager_mode_ctx.__enter__() - assert tf.executing_eagerly() - # DQN with ParameterNoise exploration (config["explore"]=True). # ---- config["exploration_config"] = {"type": "ParameterNoise"} @@ -258,9 +238,6 @@ class TestDQN(unittest.TestCase): a = trainer.compute_action(obs, explore=True) check(a, a_) - if eager_mode_ctx: - eager_mode_ctx.__exit__(None, None, None) - def _get_current_noise(self, policy, fw): # If noise not even created yet, return 0.0. if policy.exploration.noise is None: diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py index 48208b35f..26eaeffc3 100644 --- a/rllib/tests/run_regression_tests.py +++ b/rllib/tests/run_regression_tests.py @@ -51,7 +51,7 @@ if __name__ == "__main__": passed = False for i in range(3): - trials = run_experiments(experiments, resume=False) + trials = run_experiments(experiments, resume=False, verbose=1) for t in trials: if (t.last_result["episode_reward_mean"] >= diff --git a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml b/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml deleted file mode 100644 index 386612557..000000000 --- a/rllib/tuned_examples/regression_tests/cartpole-ppo-tf-multi-gpu.yaml +++ /dev/null @@ -1,20 +0,0 @@ -cartpole-ppo-tf-multi-gpu: - env: CartPole-v0 - run: PPO - stop: - episode_reward_mean: 150 - timesteps_total: 100000 - config: - gamma: 0.99 - lr: 0.0003 - num_workers: 1 - observation_filter: MeanStdFilter - num_sgd_iter: 6 - vf_share_layers: true - vf_loss_coeff: 0.01 - model: - fcnet_hiddens: [32] - fcnet_activation: linear - # Use fake-GPU setup to prove towers are working and learning. - num_gpus: 6 - _fake_gpus: true diff --git a/rllib/utils/exploration/tests/test_explorations.py b/rllib/utils/exploration/tests/test_explorations.py index bf1b0626d..fafef3b93 100644 --- a/rllib/utils/exploration/tests/test_explorations.py +++ b/rllib/utils/exploration/tests/test_explorations.py @@ -68,7 +68,7 @@ def do_test_explorations(run, # Make sure actions drawn are different # (around some mean value), given constant observations. actions = [] - for _ in range(50): + for _ in range(100): actions.append( trainer.compute_action( observation=dummy_obs,