mirror of
https://github.com/wassname/ray.git
synced 2026-07-04 10:29:45 +08:00
[rllib] Reward clipping should default to off
This commit is contained in:
@@ -38,7 +38,7 @@ COMMON_CONFIG = {
|
||||
# Whether to synchronize the statistics of remote filters.
|
||||
"synchronize_filters": True,
|
||||
# Whether to clip rewards prior to experience postprocessing
|
||||
"clip_rewards": True,
|
||||
"clip_rewards": False,
|
||||
# Whether to use rllib or deepmind preprocessors
|
||||
"preprocessor_pref": "deepmind",
|
||||
# Arguments to pass to the env creator
|
||||
|
||||
@@ -10,6 +10,7 @@ atari-a2c:
|
||||
run: A2C
|
||||
config:
|
||||
sample_batch_size: 100
|
||||
clip_rewards: True
|
||||
num_workers: 5
|
||||
num_envs_per_worker: 5
|
||||
gpu: true
|
||||
|
||||
@@ -13,6 +13,7 @@ atari-impala:
|
||||
train_batch_size: 500
|
||||
num_workers: 32
|
||||
num_envs_per_worker: 5
|
||||
clip_rewards: True
|
||||
lr_schedule: [
|
||||
[0, 0.0005],
|
||||
[20000000, 0.000000000001],
|
||||
|
||||
@@ -12,6 +12,7 @@ atari-ppo:
|
||||
lambda: 0.95
|
||||
kl_coeff: 0.5
|
||||
clip_param: 0.1
|
||||
clip_rewards: True
|
||||
entropy_coeff: 0.01
|
||||
train_batch_size: 5000
|
||||
sample_batch_size: 500
|
||||
|
||||
Reference in New Issue
Block a user