mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 01:31:08 +08:00
[rllib] Run simple regressions tests for all algs in jenkins (#3498)
This commit is contained in:
@@ -305,7 +305,7 @@ class ARSAgent(Agent):
|
||||
num_episodes, num_timesteps = 0, 0
|
||||
results = []
|
||||
while num_episodes < min_episodes:
|
||||
logger.info(
|
||||
logger.debug(
|
||||
"Collected {} episodes {} timesteps so far this iter".format(
|
||||
num_episodes, num_timesteps))
|
||||
rollout_ids = [
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env python
|
||||
# Runs one or more regression tests. Retries tests up to 3 times.
|
||||
#
|
||||
# Example usage:
|
||||
# ./run_regression_tests.sh regression-tests/cartpole-es.yaml
|
||||
|
||||
import yaml
|
||||
import sys
|
||||
|
||||
import ray
|
||||
from ray.tune import run_experiments
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
ray.init()
|
||||
|
||||
for test in sys.argv[1:]:
|
||||
experiments = yaml.load(open(test).read())
|
||||
|
||||
print("== Test config ==")
|
||||
print(yaml.dump(experiments))
|
||||
|
||||
for i in range(3):
|
||||
trials = run_experiments(experiments)
|
||||
|
||||
num_failures = 0
|
||||
for t in trials:
|
||||
if (t.last_result["episode_reward_mean"] <
|
||||
t.stopping_criterion["episode_reward_mean"]):
|
||||
num_failures += 1
|
||||
|
||||
if not num_failures:
|
||||
print("Regression test PASSED")
|
||||
sys.exit(0)
|
||||
|
||||
print("Regression test flaked, retry", i)
|
||||
|
||||
print("Regression test FAILED")
|
||||
sys.exit(1)
|
||||
@@ -1,27 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# This script generates all the regression tests for RLlib.
|
||||
|
||||
import glob
|
||||
import re
|
||||
import os
|
||||
import os.path as osp
|
||||
|
||||
CONFIG_DIR = osp.join(osp.dirname(osp.abspath(__file__)), "regression_tests")
|
||||
|
||||
TEMPLATE = """
|
||||
class Test{name}(Regression):
|
||||
_file = "{filename}"
|
||||
|
||||
def setup_cache(self):
|
||||
return _evaulate_config(self._file)
|
||||
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.chdir(CONFIG_DIR)
|
||||
|
||||
with open("regression_test.py", "a") as f:
|
||||
for filename in sorted(glob.glob("*.yaml")):
|
||||
splits = re.findall(r"\w+", osp.splitext(filename)[0])
|
||||
test_name = "".join([s.capitalize() for s in splits])
|
||||
f.write(TEMPLATE.format(name=test_name, filename=filename))
|
||||
@@ -1,10 +0,0 @@
|
||||
cartpole-a3c:
|
||||
env: CartPole-v0
|
||||
run: A3C
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 600
|
||||
config:
|
||||
num_workers: 1
|
||||
gamma: 0.95
|
||||
use_pytorch: true
|
||||
@@ -2,8 +2,8 @@ cartpole-a3c:
|
||||
env: CartPole-v0
|
||||
run: A3C
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 600
|
||||
episode_reward_mean: 100
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
num_workers: 1
|
||||
gamma: 0.95
|
||||
|
||||
@@ -2,15 +2,15 @@ cartpole-ars:
|
||||
env: CartPole-v0
|
||||
run: ARS
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 600
|
||||
episode_reward_mean: 50
|
||||
timesteps_total: 500000
|
||||
config:
|
||||
noise_stdev: 0.02
|
||||
num_rollouts: 50
|
||||
rollouts_used: 25
|
||||
num_workers: 2
|
||||
sgd_stepsize: 0.01
|
||||
noise_size: 250000000
|
||||
noise_size: 25000000
|
||||
eval_prob: 0.5
|
||||
policy_type: MLPPolicy
|
||||
fcnet_hiddens: [16, 16]
|
||||
model:
|
||||
fcnet_hiddens: [] # a linear policy
|
||||
|
||||
@@ -2,8 +2,8 @@ cartpole-dqn:
|
||||
env: CartPole-v0
|
||||
run: DQN
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 600
|
||||
episode_reward_mean: 150
|
||||
timesteps_total: 50000
|
||||
config:
|
||||
n_step: 3
|
||||
gamma: 0.95
|
||||
|
||||
@@ -2,8 +2,8 @@ cartpole-es:
|
||||
env: CartPole-v0
|
||||
run: ES
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 300
|
||||
episode_reward_mean: 75
|
||||
timesteps_total: 400000
|
||||
config:
|
||||
num_workers: 2
|
||||
noise_size: 25000000
|
||||
|
||||
@@ -2,7 +2,7 @@ cartpole-pg:
|
||||
env: CartPole-v0
|
||||
run: PG
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 300
|
||||
episode_reward_mean: 100
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
num_workers: 1
|
||||
num_workers: 0
|
||||
|
||||
@@ -2,8 +2,8 @@ cartpole-ppo:
|
||||
env: CartPole-v0
|
||||
run: PPO
|
||||
stop:
|
||||
episode_reward_mean: 200
|
||||
time_total_s: 300
|
||||
episode_reward_mean: 150
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
num_workers: 1
|
||||
batch_mode: complete_episodes
|
||||
|
||||
@@ -2,8 +2,8 @@ pendulum-ddpg:
|
||||
env: Pendulum-v0
|
||||
run: DDPG
|
||||
stop:
|
||||
episode_reward_mean: -160
|
||||
time_total_s: 900
|
||||
episode_reward_mean: -900
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
use_huber: True
|
||||
clip_rewards: False
|
||||
|
||||
@@ -2,12 +2,13 @@ pendulum-ppo:
|
||||
env: Pendulum-v0
|
||||
run: PPO
|
||||
stop:
|
||||
episode_reward_mean: -160
|
||||
timesteps_total: 600000
|
||||
episode_reward_mean: -200
|
||||
timesteps_total: 500000
|
||||
config:
|
||||
train_batch_size: 2048
|
||||
vf_clip_param: 10.0
|
||||
num_workers: 4
|
||||
num_workers: 0
|
||||
num_envs_per_worker: 10
|
||||
lambda: 0.1
|
||||
gamma: 0.95
|
||||
lr: 0.0003
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
This class runs the regression YAMLs in the ASV format.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict
|
||||
import numpy as np
|
||||
import os
|
||||
import yaml
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
|
||||
CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def _evaulate_config(filename):
|
||||
with open(os.path.join(CONFIG_DIR, filename)) as f:
|
||||
experiments = yaml.load(f)
|
||||
for _, config in experiments.items():
|
||||
config["num_samples"] = 3
|
||||
ray.init()
|
||||
trials = tune.run_experiments(experiments)
|
||||
results = defaultdict(list)
|
||||
for t in trials:
|
||||
results["time_total_s"] += [t.last_result["time_total_s"]]
|
||||
results["episode_reward_mean"] += [
|
||||
t.last_result["episode_reward_mean"]
|
||||
]
|
||||
results["training_iteration"] += [t.last_result["training_iteration"]]
|
||||
|
||||
return {k: np.median(v) for k, v in results.items()}
|
||||
|
||||
|
||||
class Regression():
|
||||
def setup_cache(self):
|
||||
# We need to implement this in separate classes
|
||||
# below so that ASV will register the setup/class
|
||||
# as a separate test.
|
||||
raise NotImplementedError
|
||||
|
||||
def teardown(self, *args):
|
||||
ray.shutdown()
|
||||
|
||||
def track_time(self, result):
|
||||
return result["time_total_s"]
|
||||
|
||||
def track_reward(self, result):
|
||||
return result["episode_reward_mean"]
|
||||
|
||||
def track_iterations(self, result):
|
||||
return result["training_iteration"]
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# This script runs all the integration tests for RLlib.
|
||||
# TODO(ekl) add large-scale tests on different envs here.
|
||||
|
||||
import glob
|
||||
import yaml
|
||||
|
||||
import ray
|
||||
from ray.tune import run_experiments
|
||||
|
||||
if __name__ == '__main__':
|
||||
experiments = {}
|
||||
|
||||
for test in glob.glob("regression_tests/*.yaml"):
|
||||
config = yaml.load(open(test).read())
|
||||
experiments.update(config)
|
||||
|
||||
print("== Test config ==")
|
||||
print(yaml.dump(experiments))
|
||||
|
||||
ray.init()
|
||||
trials = run_experiments(experiments)
|
||||
|
||||
num_failures = 0
|
||||
for t in trials:
|
||||
if (t.last_result["episode_reward_mean"] <
|
||||
t.stopping_criterion["episode_reward_mean"]):
|
||||
num_failures += 1
|
||||
|
||||
if num_failures:
|
||||
raise Exception("{} trials did not converge".format(num_failures))
|
||||
Reference in New Issue
Block a user