Checkpoint Adam momenta for DDPG (#7449)

2026-07-03 16:58:23 +08:00 · 2020-03-04 10:03:41 -08:00
parent fe7820fec9
commit aa4861c2a0
1 changed files with 2 additions and 1 deletions
@@ -295,7 +295,8 @@ class DDPGTFPolicy(DDPGPostprocessing, TFPolicy):
        # Note that this encompasses both the policy and Q-value networks and
        # their corresponding target networks
        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
-            tf.group(q_t_det_policy, q_tp1), self.sess)
+            tf.group(q_t_det_policy, q_tp1, self._actor_optimizer.variables(),
+                     self._critic_optimizer.variables()), self.sess)

        # Hard initial update
        self.update_target(tau=1.0)