[rllib] Add tuned_examples/pong-ppo.yaml (#1302)

* Add tuned_examples/pong-ppo.yaml: 21 rew in ~3380sec * Header comments
2026-07-02 19:56:44 +08:00 · 2017-12-09 01:20:22 -08:00
parent 2606001a36
commit 7e4a28f933
1 changed files with 19 additions and 0 deletions
@@ -0,0 +1,19 @@
+# On a Tesla K80 GPU, this achieves the maximum reward in about 1-1.5 hours.
+#
+# $ python train.py -f tuned_examples/pong-ppo.yaml --num-gpus=1
+#
+# - PPO_PongDeterministic-v4_0:  TERMINATED [pid=16387], 4984 s, 1117981 ts, 21 rew
+# - PPO_PongDeterministic-v4_0:  TERMINATED [pid=83606], 4592 s, 1068671 ts, 21 rew
+#
+pong-deterministic-dqn:
+    env: PongDeterministic-v4
+    run: PPO
+    resources:
+        cpu: 1
+        gpu: 1
+    stop:
+        episode_reward_mean: 21
+    config:
+        gamma: 0.99
+        num_sgd_iter: 20
+        devices: ["/gpu:0"]