[rllib] Run simple regressions tests for all algs in jenkins (#3498)

This commit is contained in:
Eric Liang
2018-12-11 17:21:53 -08:00
committed by GitHub
parent e0fbb68e47
commit 59f4743f20
17 changed files with 78 additions and 155 deletions
+1 -1
View File
@@ -305,7 +305,7 @@ class ARSAgent(Agent):
num_episodes, num_timesteps = 0, 0
results = []
while num_episodes < min_episodes:
logger.info(
logger.debug(
"Collected {} episodes {} timesteps so far this iter".format(
num_episodes, num_timesteps))
rollout_ids = [
@@ -0,0 +1,39 @@
#!/usr/bin/env python
# Runs one or more regression tests. Retries tests up to 3 times.
#
# Example usage:
# ./run_regression_tests.sh regression-tests/cartpole-es.yaml
import yaml
import sys
import ray
from ray.tune import run_experiments
if __name__ == '__main__':
ray.init()
for test in sys.argv[1:]:
experiments = yaml.load(open(test).read())
print("== Test config ==")
print(yaml.dump(experiments))
for i in range(3):
trials = run_experiments(experiments)
num_failures = 0
for t in trials:
if (t.last_result["episode_reward_mean"] <
t.stopping_criterion["episode_reward_mean"]):
num_failures += 1
if not num_failures:
print("Regression test PASSED")
sys.exit(0)
print("Regression test flaked, retry", i)
print("Regression test FAILED")
sys.exit(1)
@@ -1,27 +0,0 @@
#!/usr/bin/env python
# This script generates all the regression tests for RLlib.
import glob
import re
import os
import os.path as osp
CONFIG_DIR = osp.join(osp.dirname(osp.abspath(__file__)), "regression_tests")
TEMPLATE = """
class Test{name}(Regression):
_file = "{filename}"
def setup_cache(self):
return _evaulate_config(self._file)
"""
if __name__ == '__main__':
os.chdir(CONFIG_DIR)
with open("regression_test.py", "a") as f:
for filename in sorted(glob.glob("*.yaml")):
splits = re.findall(r"\w+", osp.splitext(filename)[0])
test_name = "".join([s.capitalize() for s in splits])
f.write(TEMPLATE.format(name=test_name, filename=filename))
@@ -1,10 +0,0 @@
cartpole-a3c:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 200
time_total_s: 600
config:
num_workers: 1
gamma: 0.95
use_pytorch: true
@@ -2,8 +2,8 @@ cartpole-a3c:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 100
timesteps_total: 100000
config:
num_workers: 1
gamma: 0.95
@@ -2,15 +2,15 @@ cartpole-ars:
env: CartPole-v0
run: ARS
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 50
timesteps_total: 500000
config:
noise_stdev: 0.02
num_rollouts: 50
rollouts_used: 25
num_workers: 2
sgd_stepsize: 0.01
noise_size: 250000000
noise_size: 25000000
eval_prob: 0.5
policy_type: MLPPolicy
fcnet_hiddens: [16, 16]
model:
fcnet_hiddens: [] # a linear policy
@@ -2,8 +2,8 @@ cartpole-dqn:
env: CartPole-v0
run: DQN
stop:
episode_reward_mean: 200
time_total_s: 600
episode_reward_mean: 150
timesteps_total: 50000
config:
n_step: 3
gamma: 0.95
@@ -2,8 +2,8 @@ cartpole-es:
env: CartPole-v0
run: ES
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 75
timesteps_total: 400000
config:
num_workers: 2
noise_size: 25000000
@@ -2,7 +2,7 @@ cartpole-pg:
env: CartPole-v0
run: PG
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 100
timesteps_total: 100000
config:
num_workers: 1
num_workers: 0
@@ -2,8 +2,8 @@ cartpole-ppo:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 200
time_total_s: 300
episode_reward_mean: 150
timesteps_total: 100000
config:
num_workers: 1
batch_mode: complete_episodes
@@ -2,8 +2,8 @@ pendulum-ddpg:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -160
time_total_s: 900
episode_reward_mean: -900
timesteps_total: 100000
config:
use_huber: True
clip_rewards: False
@@ -2,12 +2,13 @@ pendulum-ppo:
env: Pendulum-v0
run: PPO
stop:
episode_reward_mean: -160
timesteps_total: 600000
episode_reward_mean: -200
timesteps_total: 500000
config:
train_batch_size: 2048
vf_clip_param: 10.0
num_workers: 4
num_workers: 0
num_envs_per_worker: 10
lambda: 0.1
gamma: 0.95
lr: 0.0003
@@ -1,56 +0,0 @@
#!/usr/bin/env python
"""
This class runs the regression YAMLs in the ASV format.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import defaultdict
import numpy as np
import os
import yaml
import ray
from ray import tune
CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
def _evaulate_config(filename):
with open(os.path.join(CONFIG_DIR, filename)) as f:
experiments = yaml.load(f)
for _, config in experiments.items():
config["num_samples"] = 3
ray.init()
trials = tune.run_experiments(experiments)
results = defaultdict(list)
for t in trials:
results["time_total_s"] += [t.last_result["time_total_s"]]
results["episode_reward_mean"] += [
t.last_result["episode_reward_mean"]
]
results["training_iteration"] += [t.last_result["training_iteration"]]
return {k: np.median(v) for k, v in results.items()}
class Regression():
def setup_cache(self):
# We need to implement this in separate classes
# below so that ASV will register the setup/class
# as a separate test.
raise NotImplementedError
def teardown(self, *args):
ray.shutdown()
def track_time(self, result):
return result["time_total_s"]
def track_reward(self, result):
return result["episode_reward_mean"]
def track_iterations(self, result):
return result["training_iteration"]
@@ -1,31 +0,0 @@
#!/usr/bin/env python
# This script runs all the integration tests for RLlib.
# TODO(ekl) add large-scale tests on different envs here.
import glob
import yaml
import ray
from ray.tune import run_experiments
if __name__ == '__main__':
experiments = {}
for test in glob.glob("regression_tests/*.yaml"):
config = yaml.load(open(test).read())
experiments.update(config)
print("== Test config ==")
print(yaml.dump(experiments))
ray.init()
trials = run_experiments(experiments)
num_failures = 0
for t in trials:
if (t.last_result["episode_reward_mean"] <
t.stopping_criterion["episode_reward_mean"]):
num_failures += 1
if num_failures:
raise Exception("{} trials did not converge".format(num_failures))