[rllib] Add tuned_examples/pong-ppo.yaml (#1302)

* Add tuned_examples/pong-ppo.yaml: 21 rew in ~3380sec

* Header comments
This commit is contained in:
Zongheng Yang
2017-12-09 01:20:22 -08:00
committed by Richard Liaw
parent 2606001a36
commit 7e4a28f933
@@ -0,0 +1,19 @@
# On a Tesla K80 GPU, this achieves the maximum reward in about 1-1.5 hours.
#
# $ python train.py -f tuned_examples/pong-ppo.yaml --num-gpus=1
#
# - PPO_PongDeterministic-v4_0: TERMINATED [pid=16387], 4984 s, 1117981 ts, 21 rew
# - PPO_PongDeterministic-v4_0: TERMINATED [pid=83606], 4592 s, 1068671 ts, 21 rew
#
pong-deterministic-dqn:
env: PongDeterministic-v4
run: PPO
resources:
cpu: 1
gpu: 1
stop:
episode_reward_mean: 21
config:
gamma: 0.99
num_sgd_iter: 20
devices: ["/gpu:0"]