diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy.py b/python/ray/rllib/agents/ddpg/ddpg_policy.py index 2ddc18950..5aa1aa710 100644 --- a/python/ray/rllib/agents/ddpg/ddpg_policy.py +++ b/python/ray/rllib/agents/ddpg/ddpg_policy.py @@ -58,7 +58,10 @@ class DDPGPostprocessing(object): distance_in_action_space = np.sqrt( np.mean(np.square(clean_actions - noisy_actions))) self.pi_distance = distance_in_action_space - if distance_in_action_space < self.config["exploration_ou_sigma"]: + if distance_in_action_space < \ + self.config["exploration_ou_sigma"] * self.cur_noise_scale: + # multiplying the sampled OU noise by noise scale is + # equivalent to multiplying the sigma of OU by noise scale self.parameter_noise_sigma_val *= 1.01 else: self.parameter_noise_sigma_val /= 1.01