mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 16:58:23 +08:00
Checkpoint Adam momenta for DDPG (#7449)
This commit is contained in:
@@ -295,7 +295,8 @@ class DDPGTFPolicy(DDPGPostprocessing, TFPolicy):
|
||||
# Note that this encompasses both the policy and Q-value networks and
|
||||
# their corresponding target networks
|
||||
self.variables = ray.experimental.tf_utils.TensorFlowVariables(
|
||||
tf.group(q_t_det_policy, q_tp1), self.sess)
|
||||
tf.group(q_t_det_policy, q_tp1, self._actor_optimizer.variables(),
|
||||
self._critic_optimizer.variables()), self.sess)
|
||||
|
||||
# Hard initial update
|
||||
self.update_target(tau=1.0)
|
||||
|
||||
Reference in New Issue
Block a user