diff --git a/rllib/models/tf/fcnet.py b/rllib/models/tf/fcnet.py index b016b5a97..36f0f3819 100644 --- a/rllib/models/tf/fcnet.py +++ b/rllib/models/tf/fcnet.py @@ -33,7 +33,7 @@ class FullyConnectedNetwork(TFModelV2): # We are using obs_flat, so take the flattened shape as input. inputs = tf.keras.layers.Input( - shape=(np.product(obs_space.shape), ), name="observations") + shape=(int(np.product(obs_space.shape)), ), name="observations") # Last hidden layer output (before logits outputs). last_layer = inputs # The action distribution outputs. @@ -75,7 +75,7 @@ class FullyConnectedNetwork(TFModelV2): # Adjust num_outputs to be the number of nodes in the last layer. else: self.num_outputs = ( - [np.product(obs_space.shape)] + hiddens[-1:])[-1] + [int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Concat the log std vars to the end of the state-dependent means. if free_log_std and logits_out is not None: diff --git a/rllib/models/torch/fcnet.py b/rllib/models/torch/fcnet.py index 109b282b5..747fc221a 100644 --- a/rllib/models/torch/fcnet.py +++ b/rllib/models/torch/fcnet.py @@ -78,7 +78,7 @@ class FullyConnectedNetwork(TorchModelV2, nn.Module): activation_fn=None) else: self.num_outputs = ( - [np.product(obs_space.shape)] + hiddens[-1:])[-1] + [int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Layer to add the log std vars to the state-dependent means. if self.free_log_std and self._logits: