mirror of
https://github.com/wassname/ray.git
synced 2026-07-02 19:56:44 +08:00
[rllib] Add tuned_examples/pong-ppo.yaml (#1302)
* Add tuned_examples/pong-ppo.yaml: 21 rew in ~3380sec * Header comments
This commit is contained in:
committed by
Richard Liaw
parent
2606001a36
commit
7e4a28f933
@@ -0,0 +1,19 @@
|
||||
# On a Tesla K80 GPU, this achieves the maximum reward in about 1-1.5 hours.
|
||||
#
|
||||
# $ python train.py -f tuned_examples/pong-ppo.yaml --num-gpus=1
|
||||
#
|
||||
# - PPO_PongDeterministic-v4_0: TERMINATED [pid=16387], 4984 s, 1117981 ts, 21 rew
|
||||
# - PPO_PongDeterministic-v4_0: TERMINATED [pid=83606], 4592 s, 1068671 ts, 21 rew
|
||||
#
|
||||
pong-deterministic-dqn:
|
||||
env: PongDeterministic-v4
|
||||
run: PPO
|
||||
resources:
|
||||
cpu: 1
|
||||
gpu: 1
|
||||
stop:
|
||||
episode_reward_mean: 21
|
||||
config:
|
||||
gamma: 0.99
|
||||
num_sgd_iter: 20
|
||||
devices: ["/gpu:0"]
|
||||
Reference in New Issue
Block a user