From 739ddfa2290ecbbeeae572a7ab1743055379e3a0 Mon Sep 17 00:00:00 2001 From: Sergey Kolesnikov Date: Mon, 25 Jun 2018 23:05:27 +0300 Subject: [PATCH] Fix APEX update target (#2300) * apex hotfix small hotfix for Apex work * Also patch the dqn version --- python/ray/rllib/ddpg/apex.py | 2 +- python/ray/rllib/dqn/apex.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ray/rllib/ddpg/apex.py b/python/ray/rllib/ddpg/apex.py index 1799e71a8..7a3b5c598 100644 --- a/python/ray/rllib/ddpg/apex.py +++ b/python/ray/rllib/ddpg/apex.py @@ -45,6 +45,6 @@ class ApexDDPGAgent(DDPGAgent): # Ape-X updates based on num steps trained, not sampled if self.optimizer.num_steps_trained - self.last_target_update_ts > \ self.config["target_network_update_freq"]: - self.local_evaluator.update_target() + self.local_evaluator.for_policy(lambda p: p.update_target()) self.last_target_update_ts = self.optimizer.num_steps_trained self.num_target_updates += 1 diff --git a/python/ray/rllib/dqn/apex.py b/python/ray/rllib/dqn/apex.py index 34e6ecd91..bfb58d02a 100644 --- a/python/ray/rllib/dqn/apex.py +++ b/python/ray/rllib/dqn/apex.py @@ -56,6 +56,6 @@ class ApexAgent(DQNAgent): # Ape-X updates based on num steps trained, not sampled if self.optimizer.num_steps_trained - self.last_target_update_ts > \ self.config["target_network_update_freq"]: - self.local_evaluator.update_target() + self.local_evaluator.for_policy(lambda p: p.update_target()) self.last_target_update_ts = self.optimizer.num_steps_trained self.num_target_updates += 1