diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh
index 9935985d9..72d96fa0a 100755
--- a/ci/jenkins_tests/run_rllib_tests.sh
+++ b/ci/jenkins_tests/run_rllib_tests.sh
@@ -340,12 +340,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50
 
-docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
-
-docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
-
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50
 
@@ -495,3 +489,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     /ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py
+
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
+
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
diff --git a/rllib/contrib/maddpg/maddpg_policy.py b/rllib/contrib/maddpg/maddpg_policy.py
index bae7c1d7d..d0acdaa7c 100644
--- a/rllib/contrib/maddpg/maddpg_policy.py
+++ b/rllib/contrib/maddpg/maddpg_policy.py
@@ -11,7 +11,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.tf_policy import TFPolicy
-from ray.rllib.utils import try_import_tf
+from ray.rllib.utils import try_import_tf, try_import_tfp
 
 import logging
 from gym.spaces import Box, Discrete
@@ -20,6 +20,7 @@ import numpy as np
 logger = logging.getLogger(__name__)
 
 tf = try_import_tf()
+tfp = try_import_tfp()
 
 
 class MADDPGPostprocessing(object):
@@ -349,7 +350,6 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                              hiddens,
                              activation=None,
                              scope=None):
-        from tensorflow.contrib.distributions import RelaxedOneHotCategorical
         with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
             if self.config["use_state_preprocessor"]:
                 model = ModelCatalog.get_model({
@@ -365,7 +365,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                 out = tf.layers.dense(out, units=hidden, activation=activation)
             feature = tf.layers.dense(
                 out, units=act_space.shape[0], activation=None)
-            sampler = RelaxedOneHotCategorical(
+            sampler = tfp.distributions.RelaxedOneHotCategorical(
                 temperature=1.0, logits=feature).sample()
 
         return sampler, feature, model, tf.global_variables(scope.name)
diff --git a/rllib/examples/custom_keras_rnn_model.py b/rllib/examples/custom_keras_rnn_model.py
index fe3f40c86..791cb487e 100644
--- a/rllib/examples/custom_keras_rnn_model.py
+++ b/rllib/examples/custom_keras_rnn_model.py
@@ -47,7 +47,7 @@ class MyKerasRNN(RecurrentTFModelV2):
             shape=(None, obs_space.shape[0]), name="inputs")
         state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
         state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
-        seq_in = tf.keras.layers.Input(shape=(), name="seq_in")
+        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)
 
         # Preprocess observation with a hidden layer and send to LSTM cell
         dense1 = tf.keras.layers.Dense(
diff --git a/rllib/models/tf/recurrent_tf_modelv2.py b/rllib/models/tf/recurrent_tf_modelv2.py
index b5f69c9f9..2691a11ba 100644
--- a/rllib/models/tf/recurrent_tf_modelv2.py
+++ b/rllib/models/tf/recurrent_tf_modelv2.py
@@ -34,7 +34,7 @@ class RecurrentTFModelV2(TFModelV2):
                     shape=(None, obs_space.shape[0]))
                 state_in_h = tf.keras.layers.Input(shape=(256, ))
                 state_in_c = tf.keras.layers.Input(shape=(256, ))
-                seq_in = tf.keras.layers.Input(shape=())
+                seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32)
 
                 # Send to LSTM cell
                 lstm_out, state_h, state_c = tf.keras.layers.LSTM(
diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py
index 63b96378e..5c09e2464 100644
--- a/rllib/policy/eager_tf_policy.py
+++ b/rllib/policy/eager_tf_policy.py
@@ -236,7 +236,7 @@ def build_eager_tf_policy(name,
                     [_flatten_action(action_space.sample())]),
                 SampleBatch.PREV_REWARDS: tf.convert_to_tensor([0.]),
             }, [
-                tf.convert_to_tensor([s])
+                tf.convert_to_tensor(np.array([s]))
                 for s in self.model.get_initial_state()
             ], tf.convert_to_tensor([1]))