diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py index 46a43299f..8228764ec 100644 --- a/python/ray/rllib/ppo/ppo.py +++ b/python/ray/rllib/ppo/ppo.py @@ -64,7 +64,7 @@ DEFAULT_CONFIG = { "timesteps_per_batch": 4000, # Each tasks performs rollouts until at least this # number of steps is obtained - "min_steps_per_task": 1000, + "min_steps_per_task": 200, # Number of actors used to collect the rollouts "num_workers": 5, # Resource requirements for remote actors diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml new file mode 100644 index 000000000..abad14ff1 --- /dev/null +++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml @@ -0,0 +1,15 @@ +# can expect improvement to -140 reward in ~300-500k timesteps +pendulum-ppo: + env: Pendulum-v0 + run: PPO + resources: + cpu: 4 + config: + timesteps_per_batch: 2048 + lambda: 0.1 + gamma: 0.95 + sgd_stepsize: 0.0003 + sgd_batchsize: 64 + num_sgd_iter: 10 + model: + fcnet_hiddens: [64, 64]