diff --git a/python/ray/rllib/tuned_examples/pong-ppo.yaml b/python/ray/rllib/tuned_examples/pong-ppo.yaml new file mode 100644 index 000000000..9fdf5debd --- /dev/null +++ b/python/ray/rllib/tuned_examples/pong-ppo.yaml @@ -0,0 +1,19 @@ +# On a Tesla K80 GPU, this achieves the maximum reward in about 1-1.5 hours. +# +# $ python train.py -f tuned_examples/pong-ppo.yaml --num-gpus=1 +# +# - PPO_PongDeterministic-v4_0: TERMINATED [pid=16387], 4984 s, 1117981 ts, 21 rew +# - PPO_PongDeterministic-v4_0: TERMINATED [pid=83606], 4592 s, 1068671 ts, 21 rew +# +pong-deterministic-dqn: + env: PongDeterministic-v4 + run: PPO + resources: + cpu: 1 + gpu: 1 + stop: + episode_reward_mean: 21 + config: + gamma: 0.99 + num_sgd_iter: 20 + devices: ["/gpu:0"]