From 4a6cfee887235183eb523a80f60b1c8eae3480ed Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Sun, 18 Feb 2018 00:46:42 -0800 Subject: [PATCH] [rllib] add tuned example for pendulum (#1552) --- python/ray/rllib/ppo/ppo.py | 2 +- python/ray/rllib/tuned_examples/pendulum-ppo.yaml | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 python/ray/rllib/tuned_examples/pendulum-ppo.yaml diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index 46a43299f..8228764ec 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -64,7 +64,7 @@ DEFAULT_CONFIG = { "timesteps_per_batch": 4000, # Each tasks performs rollouts until at least this # number of steps is obtained - "min_steps_per_task": 1000, + "min_steps_per_task": 200, # Number of actors used to collect the rollouts "num_workers": 5, # Resource requirements for remote actors diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml new file mode 100644 index 000000000..abad14ff1 --- /dev/null +++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml @@ -0,0 +1,15 @@ +# can expect improvement to -140 reward in ~300-500k timesteps +pendulum-ppo: + env: Pendulum-v0 + run: PPO + resources: + cpu: 4 + config: + timesteps_per_batch: 2048 + lambda: 0.1 + gamma: 0.95 + sgd_stepsize: 0.0003 + sgd_batchsize: 64 + num_sgd_iter: 10 + model: + fcnet_hiddens: [64, 64]