From 4a6cfee887235183eb523a80f60b1c8eae3480ed Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Sun, 18 Feb 2018 00:46:42 -0800
Subject: [PATCH] [rllib] add tuned example for pendulum (#1552)

---
 python/ray/rllib/ppo/ppo.py                       |  2 +-
 python/ray/rllib/tuned_examples/pendulum-ppo.yaml | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 python/ray/rllib/tuned_examples/pendulum-ppo.yaml

diff --git a/python/ray/rllib/ppo/ppo.py b/python/ray/rllib/ppo/ppo.py
index 46a43299f..8228764ec 100644
--- a/python/ray/rllib/ppo/ppo.py
+++ b/python/ray/rllib/ppo/ppo.py
@@ -64,7 +64,7 @@ DEFAULT_CONFIG = {
     "timesteps_per_batch": 4000,
     # Each tasks performs rollouts until at least this
     # number of steps is obtained
-    "min_steps_per_task": 1000,
+    "min_steps_per_task": 200,
     # Number of actors used to collect the rollouts
     "num_workers": 5,
     # Resource requirements for remote actors
diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml
new file mode 100644
index 000000000..abad14ff1
--- /dev/null
+++ b/python/ray/rllib/tuned_examples/pendulum-ppo.yaml
@@ -0,0 +1,15 @@
+# can expect improvement to -140 reward in ~300-500k timesteps
+pendulum-ppo:
+    env: Pendulum-v0
+    run: PPO
+    resources:
+        cpu: 4
+    config:
+        timesteps_per_batch: 2048
+        lambda: 0.1
+        gamma: 0.95
+        sgd_stepsize: 0.0003
+        sgd_batchsize: 64
+        num_sgd_iter: 10
+        model:
+            fcnet_hiddens: [64, 64]