Fixed inconsistent action placeholder (#5213)

This commit is contained in:
Jones Wong
2019-07-18 01:55:14 +08:00
committed by Eric Liang
parent 214f09d969
commit ae03c42dd6
@@ -110,7 +110,7 @@ class MARWILPolicy(MARWILPostprocessing, TFPolicy):
self.output_actions = action_dist.sample()
# Training inputs
self.act_t = tf.placeholder(tf.int32, [None], name="action")
self.act_t = ModelCatalog.get_action_placeholder(action_space)
self.cum_rew_t = tf.placeholder(tf.float32, [None], name="reward")
# v network evaluation