mirror of
https://github.com/wassname/ray.git
synced 2026-07-03 01:43:14 +08:00
Fixed inconsistent action placeholder (#5213)
This commit is contained in:
@@ -110,7 +110,7 @@ class MARWILPolicy(MARWILPostprocessing, TFPolicy):
|
||||
self.output_actions = action_dist.sample()
|
||||
|
||||
# Training inputs
|
||||
self.act_t = tf.placeholder(tf.int32, [None], name="action")
|
||||
self.act_t = ModelCatalog.get_action_placeholder(action_space)
|
||||
self.cum_rew_t = tf.placeholder(tf.float32, [None], name="reward")
|
||||
|
||||
# v network evaluation
|
||||
|
||||
Reference in New Issue
Block a user