diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 9deeaace6..22c44ee9d 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -38,7 +38,7 @@ COMMON_CONFIG = { # Whether to synchronize the statistics of remote filters. "synchronize_filters": True, # Whether to clip rewards prior to experience postprocessing - "clip_rewards": True, + "clip_rewards": False, # Whether to use rllib or deepmind preprocessors "preprocessor_pref": "deepmind", # Arguments to pass to the env creator diff --git a/python/ray/rllib/tuned_examples/atari-a2c.yaml b/python/ray/rllib/tuned_examples/atari-a2c.yaml index 6976649d9..89feaee5b 100644 --- a/python/ray/rllib/tuned_examples/atari-a2c.yaml +++ b/python/ray/rllib/tuned_examples/atari-a2c.yaml @@ -10,6 +10,7 @@ atari-a2c: run: A2C config: sample_batch_size: 100 + clip_rewards: True num_workers: 5 num_envs_per_worker: 5 gpu: true diff --git a/python/ray/rllib/tuned_examples/atari-impala.yaml b/python/ray/rllib/tuned_examples/atari-impala.yaml index 8632e9b08..85bd801ff 100644 --- a/python/ray/rllib/tuned_examples/atari-impala.yaml +++ b/python/ray/rllib/tuned_examples/atari-impala.yaml @@ -13,6 +13,7 @@ atari-impala: train_batch_size: 500 num_workers: 32 num_envs_per_worker: 5 + clip_rewards: True lr_schedule: [ [0, 0.0005], [20000000, 0.000000000001], diff --git a/python/ray/rllib/tuned_examples/atari-ppo.yaml b/python/ray/rllib/tuned_examples/atari-ppo.yaml index 159160e0b..0ee6929aa 100644 --- a/python/ray/rllib/tuned_examples/atari-ppo.yaml +++ b/python/ray/rllib/tuned_examples/atari-ppo.yaml @@ -12,6 +12,7 @@ atari-ppo: lambda: 0.95 kl_coeff: 0.5 clip_param: 0.1 + clip_rewards: True entropy_coeff: 0.01 train_batch_size: 5000 sample_batch_size: 500