[rllib] Reward clipping should default to off

This commit is contained in:
Eric Liang
2018-09-18 15:08:01 -07:00
committed by GitHub
parent 715ec1bca5
commit ab8348b1f5
4 changed files with 4 additions and 1 deletions
+1 -1
View File
@@ -38,7 +38,7 @@ COMMON_CONFIG = {
# Whether to synchronize the statistics of remote filters.
"synchronize_filters": True,
# Whether to clip rewards prior to experience postprocessing
"clip_rewards": True,
"clip_rewards": False,
# Whether to use rllib or deepmind preprocessors
"preprocessor_pref": "deepmind",
# Arguments to pass to the env creator
@@ -10,6 +10,7 @@ atari-a2c:
run: A2C
config:
sample_batch_size: 100
clip_rewards: True
num_workers: 5
num_envs_per_worker: 5
gpu: true
@@ -13,6 +13,7 @@ atari-impala:
train_batch_size: 500
num_workers: 32
num_envs_per_worker: 5
clip_rewards: True
lr_schedule: [
[0, 0.0005],
[20000000, 0.000000000001],
@@ -12,6 +12,7 @@ atari-ppo:
lambda: 0.95
kl_coeff: 0.5
clip_param: 0.1
clip_rewards: True
entropy_coeff: 0.01
train_batch_size: 5000
sample_batch_size: 500