[rllib] Run simple regressions tests for all algs in jenkins (#3498)

This commit is contained in:
Eric Liang
2018-12-11 17:21:53 -08:00
committed by GitHub
parent e0fbb68e47
commit 59f4743f20
17 changed files with 78 additions and 155 deletions
+10 -9
View File
@@ -6,25 +6,25 @@ matrix:
include:
- os: linux
dist: trusty
env: PYTHON=2.7
env: PYTHON=2.7 PYTHONWARNINGS=ignore
- os: linux
dist: trusty
env: PYTHON=3.5
env: PYTHON=3.5 PYTHONWARNINGS=ignore
- os: osx
osx_image: xcode7
env: PYTHON=2.7
env: PYTHON=2.7 PYTHONWARNINGS=ignore
- os: osx
osx_image: xcode7
env: PYTHON=3.5
env: PYTHON=3.5 PYTHONWARNINGS=ignore
- os: linux
dist: trusty
env:
- JDK='Oracle JDK 8'
- PYTHON=3.5
- PYTHON=3.5 PYTHONWARNINGS=ignore
install:
- ./.travis/install-dependencies.sh
- export PATH="$HOME/miniconda/bin:$PATH"
@@ -33,7 +33,7 @@ matrix:
- os: linux
dist: trusty
env: LINT=1
env: LINT=1 PYTHONWARNINGS=ignore
before_install:
# In case we ever want to use a different version of clang-format:
#- wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
@@ -58,7 +58,7 @@ matrix:
- os: linux
dist: trusty
env: VALGRIND=1 PYTHON=2.7
env: VALGRIND=1 PYTHON=2.7 PYTHONWARNINGS=ignore
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq valgrind
@@ -75,7 +75,7 @@ matrix:
# Build Linux wheels.
- os: linux
dist: trusty
env: LINUX_WHEELS=1
env: LINUX_WHEELS=1 PYTHONWARNINGS=ignore
install:
- ./.travis/install-dependencies.sh
# This command should be kept in sync with ray/python/README-building-wheels.md.
@@ -86,7 +86,7 @@ matrix:
# Build MacOS wheels.
- os: osx
osx_image: xcode7
env: MAC_WHEELS=1
env: MAC_WHEELS=1 PYTHONWARNINGS=ignore
install:
- ./.travis/install-dependencies.sh
# This command should be kept in sync with ray/python/README-building-wheels.md.
@@ -100,6 +100,7 @@ matrix:
env:
- PYTHON=3.5
- RAY_USE_NEW_GCS=on
- PYTHONWARNINGS=ignore
install:
+1 -1
View File
@@ -305,7 +305,7 @@ class ARSAgent(Agent):
num_episodes, num_timesteps = 0, 0
results = []
while num_episodes < min_episodes:
logger.info(
logger.debug(
"Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
rollout_ids = [
@@ -0,0 +1,39 @@
#!/usr/bin/env python
# Runs one or more regression tests. Retries tests up to 3 times.
#
# Example usage:
# ./run_regression_tests.sh regression-tests/cartpole-es.yaml
import yaml
import sys
import ray
from ray.tune import run_experiments
if __name__ == '__main__':
ray.init()
for test in sys.argv[1:]:
experiments = yaml.load(open(test).read())
print("== Test config ==")
print(yaml.dump(experiments))
for i in range(3):
trials = run_experiments(experiments)
num_failures = 0
for t in trials:
if (t.last_result["episode_reward_mean"] <
t.stopping_criterion["episode_reward_mean"]):
num_failures += 1
if not num_failures:
print("Regression test PASSED")
sys.exit(0)
print("Regression test flaked, retry", i)
print("Regression test FAILED")
sys.exit(1)
@@ -1,27 +0,0 @@
#!/usr/bin/env python
# This script generates all the regression tests for RLlib.
import glob
import re
import os
import os.path as osp
CONFIG_DIR = osp.join(osp.dirname(osp.abspath(__file__)), "regression_tests")
TEMPLATE = """
class Test{name}(Regression):
_file = "{filename}"
def setup_cache(self):
return _evaulate_config(self._file)
"""
if __name__ == '__main__':
os.chdir(CONFIG_DIR)
with open("regression_test.py", "a") as f:
for filename in sorted(glob.glob("*.yaml")):
splits = re.findall(r"\w+", osp.splitext(filename)[0])
test_name = "".join([s.capitalize() for s in splits])
f.write(TEMPLATE.format(name=test_name, filename=filename))
@@ -1,10 +0,0 @@
cartpole-a3c:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 200
time_total_s: 600
config:
num_workers: 1
gamma: 0.95
use_pytorch: true
@@ -2,8 +2,8 @@ cartpole-a3c:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 100
timesteps_total: 100000
config:
num_workers: 1
gamma: 0.95
@@ -2,15 +2,15 @@ cartpole-ars:
env: CartPole-v0
run: ARS
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 50
timesteps_total: 500000
config:
noise_stdev: 0.02
num_rollouts: 50
rollouts_used: 25
num_workers: 2
sgd_stepsize: 0.01
noise_size: 250000000
noise_size: 25000000
eval_prob: 0.5
policy_type: MLPPolicy
fcnet_hiddens: [16, 16]
model:
fcnet_hiddens: [] # a linear policy
@@ -2,8 +2,8 @@ cartpole-dqn:
env: CartPole-v0
run: DQN
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 150
timesteps_total: 50000
config:
n_step: 3
gamma: 0.95
@@ -2,8 +2,8 @@ cartpole-es:
env: CartPole-v0
run: ES
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 75
timesteps_total: 400000
config:
num_workers: 2
noise_size: 25000000
@@ -2,7 +2,7 @@ cartpole-pg:
env: CartPole-v0
run: PG
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 100
timesteps_total: 100000
config:
num_workers: 1
num_workers: 0
@@ -2,8 +2,8 @@ cartpole-ppo:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 150
timesteps_total: 100000
config:
num_workers: 1
batch_mode: complete_episodes
@@ -2,8 +2,8 @@ pendulum-ddpg:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -160
time_total_s: 900
episode_reward_mean: -900
timesteps_total: 100000
config:
use_huber: True
clip_rewards: False
@@ -2,12 +2,13 @@ pendulum-ppo:
env: Pendulum-v0
run: PPO
stop:
episode_reward_mean: -160
timesteps_total: 600000
episode_reward_mean: -200
timesteps_total: 500000
config:
train_batch_size: 2048
vf_clip_param: 10.0
num_workers: 4
num_workers: 0
num_envs_per_worker: 10
lambda: 0.1
gamma: 0.95
lr: 0.0003
@@ -1,56 +0,0 @@
#!/usr/bin/env python
"""
This class runs the regression YAMLs in the ASV format.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import defaultdict
import numpy as np
import os
import yaml
import ray
from ray import tune
CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
def _evaulate_config(filename):
with open(os.path.join(CONFIG_DIR, filename)) as f:
experiments = yaml.load(f)
for _, config in experiments.items():
config["num_samples"] = 3
ray.init()
trials = tune.run_experiments(experiments)
results = defaultdict(list)
for t in trials:
results["time_total_s"] += [t.last_result["time_total_s"]]
results["episode_reward_mean"] += [
t.last_result["episode_reward_mean"]
]
results["training_iteration"] += [t.last_result["training_iteration"]]
return {k: np.median(v) for k, v in results.items()}
class Regression():
def setup_cache(self):
# We need to implement this in separate classes
# below so that ASV will register the setup/class
# as a separate test.
raise NotImplementedError
def teardown(self, *args):
ray.shutdown()
def track_time(self, result):
return result["time_total_s"]
def track_reward(self, result):
return result["episode_reward_mean"]
def track_iterations(self, result):
return result["training_iteration"]
@@ -1,31 +0,0 @@
#!/usr/bin/env python
# This script runs all the integration tests for RLlib.
# TODO(ekl) add large-scale tests on different envs here.
import glob
import yaml
import ray
from ray.tune import run_experiments
if __name__ == '__main__':
experiments = {}
for test in glob.glob("regression_tests/*.yaml"):
config = yaml.load(open(test).read())
experiments.update(config)
print("== Test config ==")
print(yaml.dump(experiments))
ray.init()
trials = run_experiments(experiments)
num_failures = 0
for t in trials:
if (t.last_result["episode_reward_mean"] <
t.stopping_criterion["episode_reward_mean"]):
num_failures += 1
if num_failures:
raise Exception("{} trials did not converge".format(num_failures))
@@ -299,6 +299,12 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
/ray/python/ray/rllib/test/test_rollout.sh
# Run all single-agent regression tests (3x retry each)
for yaml in $(ls $ROOT_DIR/../../python/ray/rllib/tuned_examples/regression_tests); do
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/rllib/test/run_regression_tests.py /ray/python/ray/rllib/tuned_examples/regression_tests/$yaml
done
# Try a couple times since it's stochastic
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
python /ray/python/ray/rllib/test/multiagent_pendulum.py || \