Fix TF2 / rllib test (#5846)

2026-06-27 19:32:11 +08:00 · 2019-10-07 14:25:16 -07:00
parent 9bb3633cd9
commit 04e997fe0d
5 changed files with 12 additions and 12 deletions
@@ -340,12 +340,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50

-docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
-
-docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
-    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
-
 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50

@@ -495,3 +489,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \

 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    /ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py
+
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
+
+docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
@@ -11,7 +11,7 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.tf_policy import TFPolicy
-from ray.rllib.utils import try_import_tf
+from ray.rllib.utils import try_import_tf, try_import_tfp

 import logging
 from gym.spaces import Box, Discrete
@@ -20,6 +20,7 @@ import numpy as np
 logger = logging.getLogger(__name__)

 tf = try_import_tf()
+tfp = try_import_tfp()


 class MADDPGPostprocessing(object):
@@ -349,7 +350,6 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                             hiddens,
                             activation=None,
                             scope=None):
-        from tensorflow.contrib.distributions import RelaxedOneHotCategorical
        with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
            if self.config["use_state_preprocessor"]:
                model = ModelCatalog.get_model({
@@ -365,7 +365,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
                out = tf.layers.dense(out, units=hidden, activation=activation)
            feature = tf.layers.dense(
                out, units=act_space.shape[0], activation=None)
-            sampler = RelaxedOneHotCategorical(
+            sampler = tfp.distributions.RelaxedOneHotCategorical(
                temperature=1.0, logits=feature).sample()

        return sampler, feature, model, tf.global_variables(scope.name)
@@ -47,7 +47,7 @@ class MyKerasRNN(RecurrentTFModelV2):
            shape=(None, obs_space.shape[0]), name="inputs")
        state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
        state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
-        seq_in = tf.keras.layers.Input(shape=(), name="seq_in")
+        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        # Preprocess observation with a hidden layer and send to LSTM cell
        dense1 = tf.keras.layers.Dense(
@@ -34,7 +34,7 @@ class RecurrentTFModelV2(TFModelV2):
                    shape=(None, obs_space.shape[0]))
                state_in_h = tf.keras.layers.Input(shape=(256, ))
                state_in_c = tf.keras.layers.Input(shape=(256, ))
-                seq_in = tf.keras.layers.Input(shape=())
+                seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32)

                # Send to LSTM cell
                lstm_out, state_h, state_c = tf.keras.layers.LSTM(
@@ -236,7 +236,7 @@ def build_eager_tf_policy(name,
                    [_flatten_action(action_space.sample())]),
                SampleBatch.PREV_REWARDS: tf.convert_to_tensor([0.]),
            }, [
-                tf.convert_to_tensor([s])
+                tf.convert_to_tensor(np.array([s]))
                for s in self.model.get_initial_state()
            ], tf.convert_to_tensor([1]))