diff --git a/rllib/agents/ddpg/ddpg_policy.py b/rllib/agents/ddpg/ddpg_policy.py index 98c618aa8..5c44ec220 100644 --- a/rllib/agents/ddpg/ddpg_policy.py +++ b/rllib/agents/ddpg/ddpg_policy.py @@ -295,7 +295,8 @@ class DDPGTFPolicy(DDPGPostprocessing, TFPolicy): # Note that this encompasses both the policy and Q-value networks and # their corresponding target networks self.variables = ray.experimental.tf_utils.TensorFlowVariables( - tf.group(q_t_det_policy, q_tp1), self.sess) + tf.group(q_t_det_policy, q_tp1, self._actor_optimizer.variables(), + self._critic_optimizer.variables()), self.sess) # Hard initial update self.update_target(tau=1.0)