mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 20:06:31 +08:00
[rllib] Add some more tuned atari results to documentation (#2991)
* dqn results ++ * add scale * hour * fix * small dqn table * update * steps * upd * apex * up * add apex results * tip
This commit is contained in:
@@ -0,0 +1,34 @@
|
||||
# Runs on a single g3.16xl AWS machine
|
||||
apex:
|
||||
env:
|
||||
grid_search:
|
||||
- BreakoutNoFrameskip-v4
|
||||
- BeamRiderNoFrameskip-v4
|
||||
- QbertNoFrameskip-v4
|
||||
- SpaceInvadersNoFrameskip-v4
|
||||
run: APEX
|
||||
config:
|
||||
double_q: false
|
||||
dueling: false
|
||||
num_atoms: 1
|
||||
noisy: false
|
||||
n_step: 3
|
||||
lr: .0001
|
||||
adam_epsilon: .00015
|
||||
hiddens: [512]
|
||||
buffer_size: 1000000
|
||||
schedule_max_timesteps: 2000000
|
||||
exploration_final_eps: 0.01
|
||||
exploration_fraction: .1
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: false
|
||||
|
||||
# APEX
|
||||
num_workers: 8
|
||||
num_envs_per_worker: 8
|
||||
sample_batch_size: 158
|
||||
train_batch_size: 512
|
||||
target_network_update_freq: 50000
|
||||
timesteps_per_iteration: 25000
|
||||
@@ -0,0 +1,31 @@
|
||||
basic-dqn:
|
||||
env:
|
||||
grid_search:
|
||||
- BreakoutNoFrameskip-v4
|
||||
- BeamRiderNoFrameskip-v4
|
||||
- QbertNoFrameskip-v4
|
||||
- SpaceInvadersNoFrameskip-v4
|
||||
run: DQN
|
||||
config:
|
||||
double_q: false
|
||||
dueling: false
|
||||
num_atoms: 51
|
||||
noisy: false
|
||||
prioritized_replay: false
|
||||
n_step: 1
|
||||
target_network_update_freq: 8000
|
||||
lr: .0000625
|
||||
adam_epsilon: .00015
|
||||
hiddens: [512]
|
||||
learning_starts: 20000
|
||||
buffer_size: 1000000
|
||||
sample_batch_size: 4
|
||||
train_batch_size: 32
|
||||
schedule_max_timesteps: 2000000
|
||||
exploration_final_eps: 0.01
|
||||
exploration_fraction: .1
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
timesteps_per_iteration: 10000
|
||||
@@ -0,0 +1,33 @@
|
||||
# Runs on a single g3.16xl node
|
||||
# See https://github.com/ray-project/rl-experiments for results
|
||||
atari-basic-dqn:
|
||||
env:
|
||||
grid_search:
|
||||
- BreakoutNoFrameskip-v4
|
||||
- BeamRiderNoFrameskip-v4
|
||||
- QbertNoFrameskip-v4
|
||||
- SpaceInvadersNoFrameskip-v4
|
||||
run: DQN
|
||||
config:
|
||||
double_q: false
|
||||
dueling: false
|
||||
num_atoms: 1
|
||||
noisy: false
|
||||
prioritized_replay: false
|
||||
n_step: 1
|
||||
target_network_update_freq: 8000
|
||||
lr: .0000625
|
||||
adam_epsilon: .00015
|
||||
hiddens: [512]
|
||||
learning_starts: 20000
|
||||
buffer_size: 1000000
|
||||
sample_batch_size: 4
|
||||
train_batch_size: 32
|
||||
schedule_max_timesteps: 2000000
|
||||
exploration_final_eps: 0.01
|
||||
exploration_fraction: .1
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
timesteps_per_iteration: 10000
|
||||
@@ -0,0 +1,31 @@
|
||||
dueling-ddqn:
|
||||
env:
|
||||
grid_search:
|
||||
- BreakoutNoFrameskip-v4
|
||||
- BeamRiderNoFrameskip-v4
|
||||
- QbertNoFrameskip-v4
|
||||
- SpaceInvadersNoFrameskip-v4
|
||||
run: DQN
|
||||
config:
|
||||
double_q: true
|
||||
dueling: true
|
||||
num_atoms: 1
|
||||
noisy: false
|
||||
prioritized_replay: false
|
||||
n_step: 1
|
||||
target_network_update_freq: 8000
|
||||
lr: .0000625
|
||||
adam_epsilon: .00015
|
||||
hiddens: [512]
|
||||
learning_starts: 20000
|
||||
buffer_size: 1000000
|
||||
sample_batch_size: 4
|
||||
train_batch_size: 32
|
||||
schedule_max_timesteps: 2000000
|
||||
exploration_final_eps: 0.01
|
||||
exploration_fraction: .1
|
||||
prioritized_replay_alpha: 0.5
|
||||
beta_annealing_fraction: 1.0
|
||||
final_prioritized_replay_beta: 1.0
|
||||
gpu: true
|
||||
timesteps_per_iteration: 10000
|
||||
@@ -11,8 +11,9 @@ atari-ppo:
|
||||
config:
|
||||
lambda: 0.95
|
||||
kl_coeff: 0.5
|
||||
clip_param: 0.1
|
||||
clip_rewards: True
|
||||
clip_param: 0.1
|
||||
vf_clip_param: 10.0
|
||||
entropy_coeff: 0.01
|
||||
train_batch_size: 5000
|
||||
sample_batch_size: 500
|
||||
@@ -24,7 +25,3 @@ atari-ppo:
|
||||
observation_filter: NoFilter
|
||||
vf_share_layers: true
|
||||
num_gpus: 1
|
||||
lr_schedule: [
|
||||
[0, 0.0007],
|
||||
[20000000, 0.000000000001],
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user