diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh index 9935985d9..72d96fa0a 100755 --- a/ci/jenkins_tests/run_rllib_tests.sh +++ b/ci/jenkins_tests/run_rllib_tests.sh @@ -340,12 +340,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50 -docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv - -docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv - docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50 @@ -495,3 +489,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ /ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py + +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv + +docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv diff --git a/rllib/contrib/maddpg/maddpg_policy.py b/rllib/contrib/maddpg/maddpg_policy.py index bae7c1d7d..d0acdaa7c 100644 --- a/rllib/contrib/maddpg/maddpg_policy.py +++ b/rllib/contrib/maddpg/maddpg_policy.py @@ -11,7 +11,7 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.policy.policy import Policy from ray.rllib.policy.tf_policy import TFPolicy -from ray.rllib.utils import try_import_tf +from ray.rllib.utils import try_import_tf, try_import_tfp import logging from gym.spaces import Box, Discrete @@ -20,6 +20,7 @@ import numpy as np logger = logging.getLogger(__name__) tf = try_import_tf() +tfp = try_import_tfp() class MADDPGPostprocessing(object): @@ -349,7 +350,6 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): hiddens, activation=None, scope=None): - from tensorflow.contrib.distributions import RelaxedOneHotCategorical with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: if self.config["use_state_preprocessor"]: model = ModelCatalog.get_model({ @@ -365,7 +365,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): out = tf.layers.dense(out, units=hidden, activation=activation) feature = tf.layers.dense( out, units=act_space.shape[0], activation=None) - sampler = RelaxedOneHotCategorical( + sampler = tfp.distributions.RelaxedOneHotCategorical( temperature=1.0, logits=feature).sample() return sampler, feature, model, tf.global_variables(scope.name) diff --git a/rllib/examples/custom_keras_rnn_model.py b/rllib/examples/custom_keras_rnn_model.py index fe3f40c86..791cb487e 100644 --- a/rllib/examples/custom_keras_rnn_model.py +++ b/rllib/examples/custom_keras_rnn_model.py @@ -47,7 +47,7 @@ class MyKerasRNN(RecurrentTFModelV2): shape=(None, obs_space.shape[0]), name="inputs") state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h") state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c") - seq_in = tf.keras.layers.Input(shape=(), name="seq_in") + seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32) # Preprocess observation with a hidden layer and send to LSTM cell dense1 = tf.keras.layers.Dense( diff --git a/rllib/models/tf/recurrent_tf_modelv2.py b/rllib/models/tf/recurrent_tf_modelv2.py index b5f69c9f9..2691a11ba 100644 --- a/rllib/models/tf/recurrent_tf_modelv2.py +++ b/rllib/models/tf/recurrent_tf_modelv2.py @@ -34,7 +34,7 @@ class RecurrentTFModelV2(TFModelV2): shape=(None, obs_space.shape[0])) state_in_h = tf.keras.layers.Input(shape=(256, )) state_in_c = tf.keras.layers.Input(shape=(256, )) - seq_in = tf.keras.layers.Input(shape=()) + seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32) # Send to LSTM cell lstm_out, state_h, state_c = tf.keras.layers.LSTM( diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index 63b96378e..5c09e2464 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -236,7 +236,7 @@ def build_eager_tf_policy(name, [_flatten_action(action_space.sample())]), SampleBatch.PREV_REWARDS: tf.convert_to_tensor([0.]), }, [ - tf.convert_to_tensor([s]) + tf.convert_to_tensor(np.array([s])) for s in self.model.get_initial_state() ], tf.convert_to_tensor([1]))