From 9e9c5248238629a00fb8eeeffcd10fc5e54cf76d Mon Sep 17 00:00:00 2001 From: Zack Polizzi Date: Thu, 12 Dec 2019 10:57:55 -0800 Subject: [PATCH] Update pong-apex tuned example (#6462) --- rllib/tuned_examples/pong-apex.yaml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/rllib/tuned_examples/pong-apex.yaml b/rllib/tuned_examples/pong-apex.yaml index e0eee0a63..28097900c 100644 --- a/rllib/tuned_examples/pong-apex.yaml +++ b/rllib/tuned_examples/pong-apex.yaml @@ -1,13 +1,14 @@ -# This can be expected to reach 20.8 reward within an hour when using a V100 GPU -# (e.g. p3.2xl instance on AWS, and m4.4xl workers). It also can reach ~21 reward -# within an hour with fewer workers (e.g. 4-8) but less reliably. +# This reaches ~20 reward in 50 minutes (6M train steps, 2M env steps) on a +# p3.2xlarge AWS instance. +# See https://app.wandb.ai/zplizzi/test/runs/ayuuhixr?workspace=user-zplizzi +# for training curves. pong-apex: env: PongNoFrameskip-v4 run: APEX config: - target_network_update_freq: 50000 - num_workers: 32 - ## can also enable vectorization within processes - # num_envs_per_worker: 4 - lr: .0001 + target_network_update_freq: 20000 + num_workers: 4 + num_envs_per_worker: 8 + lr: .00005 + train_batch_size: 64 gamma: 0.99