mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 17:56:52 +08:00
1475600c81
* removed ddpg2 * removed ddpg2 from codebase * added tests used in ddpg vs ddpg2 comparison * added notes about training timesteps to yaml files * removed ddpg2 yaml files * removed unnecessary configs from yaml files * removed unnecessary configs from yaml files * moved pendulum, mountaincarcontinuous, and halfcheetah tests to tuned_examples * moved pendulum, mountaincarcontinuous, and halfcheetah tests to tuned_examples * added more configuration details to yaml files * removed random starts from halfcheetah
68 lines
1.8 KiB
YAML
68 lines
1.8 KiB
YAML
# This configuration can expect to reach -160 reward in 10k-20k timesteps
|
|
pendulum-ddpg:
|
|
env: Pendulum-v0
|
|
run: DDPG
|
|
stop:
|
|
episode_reward_mean: -160
|
|
time_total_s: 600 # 10 minutes
|
|
config:
|
|
# === Model ===
|
|
actor_hiddens: [64, 64]
|
|
critic_hiddens: [64, 64]
|
|
n_step: 1
|
|
model: {}
|
|
gamma: 0.99
|
|
env_config: {}
|
|
|
|
# === Exploration ===
|
|
schedule_max_timesteps: 100000
|
|
timesteps_per_iteration: 600
|
|
exploration_fraction: 0.1
|
|
exploration_final_eps: 0.02
|
|
noise_scale: 0.1
|
|
exploration_theta: 0.15
|
|
exploration_sigma: 0.2
|
|
target_network_update_freq: 0
|
|
tau: 0.001
|
|
|
|
# === Replay buffer ===
|
|
buffer_size: 10000
|
|
prioritized_replay: True
|
|
prioritized_replay_alpha: 0.6
|
|
prioritized_replay_beta: 0.4
|
|
prioritized_replay_eps: 0.000001
|
|
clip_rewards: False
|
|
|
|
# === Optimization ===
|
|
actor_lr: 0.0001
|
|
critic_lr: 0.001
|
|
use_huber: True
|
|
huber_threshold: 1.0
|
|
l2_reg: 0.000001
|
|
learning_starts: 500
|
|
sample_batch_size: 1
|
|
train_batch_size: 64
|
|
smoothing_num_episodes: 10
|
|
|
|
# === Tensorflow ===
|
|
tf_session_args: {
|
|
"device_count": {
|
|
"CPU": 2
|
|
},
|
|
"log_device_placement": False,
|
|
"allow_soft_placement": True,
|
|
"gpu_options": {
|
|
"allow_growth": True
|
|
},
|
|
"inter_op_parallelism_threads": 1,
|
|
"intra_op_parallelism_threads": 1,
|
|
}
|
|
|
|
# === Parallelism ===
|
|
num_workers: 0
|
|
num_gpus_per_worker: 0
|
|
optimizer_class: "LocalSyncReplayOptimizer"
|
|
optimizer_config: {}
|
|
per_worker_exploration: False
|
|
worker_side_prioritization: False
|