diff --git a/python/ray/rllib/ddpg/apex.py b/python/ray/rllib/ddpg/apex.py index 1799e71a8..7a3b5c598 100644 --- a/python/ray/rllib/ddpg/apex.py +++ b/python/ray/rllib/ddpg/apex.py @@ -45,6 +45,6 @@ class ApexDDPGAgent(DDPGAgent): # Ape-X updates based on num steps trained, not sampled if self.optimizer.num_steps_trained - self.last_target_update_ts > \ self.config["target_network_update_freq"]: - self.local_evaluator.update_target() + self.local_evaluator.for_policy(lambda p: p.update_target()) self.last_target_update_ts = self.optimizer.num_steps_trained self.num_target_updates += 1 diff --git a/python/ray/rllib/dqn/apex.py b/python/ray/rllib/dqn/apex.py index 34e6ecd91..bfb58d02a 100644 --- a/python/ray/rllib/dqn/apex.py +++ b/python/ray/rllib/dqn/apex.py @@ -56,6 +56,6 @@ class ApexAgent(DQNAgent): # Ape-X updates based on num steps trained, not sampled if self.optimizer.num_steps_trained - self.last_target_update_ts > \ self.config["target_network_update_freq"]: - self.local_evaluator.update_target() + self.local_evaluator.for_policy(lambda p: p.update_target()) self.last_target_update_ts = self.optimizer.num_steps_trained self.num_target_updates += 1