mirror of
https://github.com/wassname/ray.git
synced 2026-07-05 18:48:25 +08:00
Fixes Inconsistent weight assignment operations in DQNPolicyGraph (#4… (#4504)
* Fixes Inconsistent weight assignment operations in DQNPolicyGraph (#4502) * Update dqn_policy_graph.py
This commit is contained in:
@@ -431,9 +431,9 @@ class DQNPolicyGraph(DQNPostprocessing, TFPolicyGraph):
|
||||
# update_target_fn will be called periodically to copy Q network to
|
||||
# target Q network
|
||||
update_target_expr = []
|
||||
for var, var_target in zip(
|
||||
sorted(self.q_func_vars, key=lambda v: v.name),
|
||||
sorted(self.target_q_func_vars, key=lambda v: v.name)):
|
||||
assert len(self.q_func_vars) == len(self.target_q_func_vars), \
|
||||
(self.q_func_vars, self.target_q_func_vars)
|
||||
for var, var_target in zip(self.q_func_vars, self.target_q_func_vars):
|
||||
update_target_expr.append(var_target.assign(var))
|
||||
self.update_target_expr = tf.group(*update_target_expr)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user