[rllib] add tuned example for pendulum (#1552)

2026-06-29 15:27:47 +08:00 · 2018-02-18 00:46:42 -08:00
parent 61d8a17de0
commit 4a6cfee887
2 changed files with 16 additions and 1 deletions
@@ -64,7 +64,7 @@ DEFAULT_CONFIG = {
    "timesteps_per_batch": 4000,
    # Each tasks performs rollouts until at least this
    # number of steps is obtained
-    "min_steps_per_task": 1000,
+    "min_steps_per_task": 200,
    # Number of actors used to collect the rollouts
    "num_workers": 5,
    # Resource requirements for remote actors
@@ -0,0 +1,15 @@
+# can expect improvement to -140 reward in ~300-500k timesteps
+pendulum-ppo:
+    env: Pendulum-v0
+    run: PPO
+    resources:
+        cpu: 4
+    config:
+        timesteps_per_batch: 2048
+        lambda: 0.1
+        gamma: 0.95
+        sgd_stepsize: 0.0003
+        sgd_batchsize: 64
+        num_sgd_iter: 10
+        model:
+            fcnet_hiddens: [64, 64]