diff --git a/.travis.yml b/.travis.yml index 9fadfc59c..e8c95fcc1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,25 +6,25 @@ matrix: include: - os: linux dist: trusty - env: PYTHON=2.7 + env: PYTHON=2.7 PYTHONWARNINGS=ignore - os: linux dist: trusty - env: PYTHON=3.5 + env: PYTHON=3.5 PYTHONWARNINGS=ignore - os: osx osx_image: xcode7 - env: PYTHON=2.7 + env: PYTHON=2.7 PYTHONWARNINGS=ignore - os: osx osx_image: xcode7 - env: PYTHON=3.5 + env: PYTHON=3.5 PYTHONWARNINGS=ignore - os: linux dist: trusty env: - JDK='Oracle JDK 8' - - PYTHON=3.5 + - PYTHON=3.5 PYTHONWARNINGS=ignore install: - ./.travis/install-dependencies.sh - export PATH="$HOME/miniconda/bin:$PATH" @@ -33,7 +33,7 @@ matrix: - os: linux dist: trusty - env: LINT=1 + env: LINT=1 PYTHONWARNINGS=ignore before_install: # In case we ever want to use a different version of clang-format: #- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - @@ -58,7 +58,7 @@ matrix: - os: linux dist: trusty - env: VALGRIND=1 PYTHON=2.7 + env: VALGRIND=1 PYTHON=2.7 PYTHONWARNINGS=ignore before_install: - sudo apt-get update -qq - sudo apt-get install -qq valgrind @@ -75,7 +75,7 @@ matrix: # Build Linux wheels. - os: linux dist: trusty - env: LINUX_WHEELS=1 + env: LINUX_WHEELS=1 PYTHONWARNINGS=ignore install: - ./.travis/install-dependencies.sh # This command should be kept in sync with ray/python/README-building-wheels.md. @@ -86,7 +86,7 @@ matrix: # Build MacOS wheels. - os: osx osx_image: xcode7 - env: MAC_WHEELS=1 + env: MAC_WHEELS=1 PYTHONWARNINGS=ignore install: - ./.travis/install-dependencies.sh # This command should be kept in sync with ray/python/README-building-wheels.md. @@ -100,6 +100,7 @@ matrix: env: - PYTHON=3.5 - RAY_USE_NEW_GCS=on + - PYTHONWARNINGS=ignore install: diff --git a/python/ray/rllib/agents/ars/ars.py b/python/ray/rllib/agents/ars/ars.py index 1b39a79d0..aafcee7f4 100644 --- a/python/ray/rllib/agents/ars/ars.py +++ b/python/ray/rllib/agents/ars/ars.py @@ -305,7 +305,7 @@ class ARSAgent(Agent): num_episodes, num_timesteps = 0, 0 results = [] while num_episodes < min_episodes: - logger.info( + logger.debug( "Collected {} episodes {} timesteps so far this iter".format( num_episodes, num_timesteps)) rollout_ids = [ diff --git a/python/ray/rllib/test/run_regression_tests.py b/python/ray/rllib/test/run_regression_tests.py new file mode 100644 index 000000000..a542924bd --- /dev/null +++ b/python/ray/rllib/test/run_regression_tests.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# Runs one or more regression tests. Retries tests up to 3 times. +# +# Example usage: +# ./run_regression_tests.sh regression-tests/cartpole-es.yaml + +import yaml +import sys + +import ray +from ray.tune import run_experiments + +if __name__ == '__main__': + + ray.init() + + for test in sys.argv[1:]: + experiments = yaml.load(open(test).read()) + + print("== Test config ==") + print(yaml.dump(experiments)) + + for i in range(3): + trials = run_experiments(experiments) + + num_failures = 0 + for t in trials: + if (t.last_result["episode_reward_mean"] < + t.stopping_criterion["episode_reward_mean"]): + num_failures += 1 + + if not num_failures: + print("Regression test PASSED") + sys.exit(0) + + print("Regression test flaked, retry", i) + + print("Regression test FAILED") + sys.exit(1) diff --git a/python/ray/rllib/tuned_examples/generate_regression_tests.py b/python/ray/rllib/tuned_examples/generate_regression_tests.py deleted file mode 100755 index 3196bd4d0..000000000 --- a/python/ray/rllib/tuned_examples/generate_regression_tests.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python -# This script generates all the regression tests for RLlib. - -import glob -import re -import os -import os.path as osp - -CONFIG_DIR = osp.join(osp.dirname(osp.abspath(__file__)), "regression_tests") - -TEMPLATE = """ -class Test{name}(Regression): - _file = "{filename}" - - def setup_cache(self): - return _evaulate_config(self._file) - -""" - -if __name__ == '__main__': - os.chdir(CONFIG_DIR) - - with open("regression_test.py", "a") as f: - for filename in sorted(glob.glob("*.yaml")): - splits = re.findall(r"\w+", osp.splitext(filename)[0]) - test_name = "".join([s.capitalize() for s in splits]) - f.write(TEMPLATE.format(name=test_name, filename=filename)) diff --git a/python/ray/rllib/tuned_examples/regression_tests/__init__.py b/python/ray/rllib/tuned_examples/regression_tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c-pytorch.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c-pytorch.yaml deleted file mode 100644 index a25da3c77..000000000 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c-pytorch.yaml +++ /dev/null @@ -1,10 +0,0 @@ -cartpole-a3c: - env: CartPole-v0 - run: A3C - stop: - episode_reward_mean: 200 - time_total_s: 600 - config: - num_workers: 1 - gamma: 0.95 - use_pytorch: true diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml index f20ea73c3..08ff2206f 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml @@ -2,8 +2,8 @@ cartpole-a3c: env: CartPole-v0 run: A3C stop: - episode_reward_mean: 200 - time_total_s: 600 + episode_reward_mean: 100 + timesteps_total: 100000 config: num_workers: 1 gamma: 0.95 diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml index 550170c2e..bae79b2bb 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml @@ -2,15 +2,15 @@ cartpole-ars: env: CartPole-v0 run: ARS stop: - episode_reward_mean: 200 - time_total_s: 600 + episode_reward_mean: 50 + timesteps_total: 500000 config: noise_stdev: 0.02 num_rollouts: 50 rollouts_used: 25 num_workers: 2 sgd_stepsize: 0.01 - noise_size: 250000000 + noise_size: 25000000 eval_prob: 0.5 - policy_type: MLPPolicy - fcnet_hiddens: [16, 16] + model: + fcnet_hiddens: [] # a linear policy diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml index 04aa2dc6e..5a6ba5033 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml @@ -2,8 +2,8 @@ cartpole-dqn: env: CartPole-v0 run: DQN stop: - episode_reward_mean: 200 - time_total_s: 600 + episode_reward_mean: 150 + timesteps_total: 50000 config: n_step: 3 gamma: 0.95 diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml index a0246f1e2..5c411188d 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml @@ -2,8 +2,8 @@ cartpole-es: env: CartPole-v0 run: ES stop: - episode_reward_mean: 200 - time_total_s: 300 + episode_reward_mean: 75 + timesteps_total: 400000 config: num_workers: 2 noise_size: 25000000 diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml index 2bf9e7548..58c29e9e5 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml @@ -2,7 +2,7 @@ cartpole-pg: env: CartPole-v0 run: PG stop: - episode_reward_mean: 200 - time_total_s: 300 + episode_reward_mean: 100 + timesteps_total: 100000 config: - num_workers: 1 + num_workers: 0 diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml index 82ea5846e..3f326cf83 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml @@ -2,8 +2,8 @@ cartpole-ppo: env: CartPole-v0 run: PPO stop: - episode_reward_mean: 200 - time_total_s: 300 + episode_reward_mean: 150 + timesteps_total: 100000 config: num_workers: 1 batch_mode: complete_episodes diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml index 124f756ec..696c251c9 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml @@ -2,8 +2,8 @@ pendulum-ddpg: env: Pendulum-v0 run: DDPG stop: - episode_reward_mean: -160 - time_total_s: 900 + episode_reward_mean: -900 + timesteps_total: 100000 config: use_huber: True clip_rewards: False diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml index 63536d3be..015429110 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml @@ -2,12 +2,13 @@ pendulum-ppo: env: Pendulum-v0 run: PPO stop: - episode_reward_mean: -160 - timesteps_total: 600000 + episode_reward_mean: -200 + timesteps_total: 500000 config: train_batch_size: 2048 vf_clip_param: 10.0 - num_workers: 4 + num_workers: 0 + num_envs_per_worker: 10 lambda: 0.1 gamma: 0.95 lr: 0.0003 diff --git a/python/ray/rllib/tuned_examples/regression_tests/regression_test.py b/python/ray/rllib/tuned_examples/regression_tests/regression_test.py deleted file mode 100644 index ff994b904..000000000 --- a/python/ray/rllib/tuned_examples/regression_tests/regression_test.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -""" -This class runs the regression YAMLs in the ASV format. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict -import numpy as np -import os -import yaml - -import ray -from ray import tune - -CONFIG_DIR = os.path.dirname(os.path.abspath(__file__)) - - -def _evaulate_config(filename): - with open(os.path.join(CONFIG_DIR, filename)) as f: - experiments = yaml.load(f) - for _, config in experiments.items(): - config["num_samples"] = 3 - ray.init() - trials = tune.run_experiments(experiments) - results = defaultdict(list) - for t in trials: - results["time_total_s"] += [t.last_result["time_total_s"]] - results["episode_reward_mean"] += [ - t.last_result["episode_reward_mean"] - ] - results["training_iteration"] += [t.last_result["training_iteration"]] - - return {k: np.median(v) for k, v in results.items()} - - -class Regression(): - def setup_cache(self): - # We need to implement this in separate classes - # below so that ASV will register the setup/class - # as a separate test. - raise NotImplementedError - - def teardown(self, *args): - ray.shutdown() - - def track_time(self, result): - return result["time_total_s"] - - def track_reward(self, result): - return result["episode_reward_mean"] - - def track_iterations(self, result): - return result["training_iteration"] diff --git a/python/ray/rllib/tuned_examples/run_regression_tests.py b/python/ray/rllib/tuned_examples/run_regression_tests.py deleted file mode 100755 index 823da327c..000000000 --- a/python/ray/rllib/tuned_examples/run_regression_tests.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# This script runs all the integration tests for RLlib. -# TODO(ekl) add large-scale tests on different envs here. - -import glob -import yaml - -import ray -from ray.tune import run_experiments - -if __name__ == '__main__': - experiments = {} - - for test in glob.glob("regression_tests/*.yaml"): - config = yaml.load(open(test).read()) - experiments.update(config) - - print("== Test config ==") - print(yaml.dump(experiments)) - - ray.init() - trials = run_experiments(experiments) - - num_failures = 0 - for t in trials: - if (t.last_result["episode_reward_mean"] < - t.stopping_criterion["episode_reward_mean"]): - num_failures += 1 - - if num_failures: - raise Exception("{} trials did not converge".format(num_failures)) diff --git a/test/jenkins_tests/run_multi_node_tests.sh b/test/jenkins_tests/run_multi_node_tests.sh index 20acbd225..f3b0c69b0 100755 --- a/test/jenkins_tests/run_multi_node_tests.sh +++ b/test/jenkins_tests/run_multi_node_tests.sh @@ -299,6 +299,12 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/python/ray/rllib/test/test_rollout.sh +# Run all single-agent regression tests (3x retry each) +for yaml in $(ls $ROOT_DIR/../../python/ray/rllib/tuned_examples/regression_tests); do + docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + python /ray/python/ray/rllib/test/run_regression_tests.py /ray/python/ray/rllib/tuned_examples/regression_tests/$yaml +done + # Try a couple times since it's stochastic docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/rllib/test/multiagent_pendulum.py || \