mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:32:11 +08:00
Fix TF2 / rllib test (#5846)
This commit is contained in:
@@ -340,12 +340,6 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50
|
||||
|
||||
@@ -495,3 +489,9 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv
|
||||
|
||||
docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||
/ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv
|
||||
|
||||
@@ -11,7 +11,7 @@ from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.error import UnsupportedSpaceException
|
||||
from ray.rllib.policy.policy import Policy
|
||||
from ray.rllib.policy.tf_policy import TFPolicy
|
||||
from ray.rllib.utils import try_import_tf
|
||||
from ray.rllib.utils import try_import_tf, try_import_tfp
|
||||
|
||||
import logging
|
||||
from gym.spaces import Box, Discrete
|
||||
@@ -20,6 +20,7 @@ import numpy as np
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
tf = try_import_tf()
|
||||
tfp = try_import_tfp()
|
||||
|
||||
|
||||
class MADDPGPostprocessing(object):
|
||||
@@ -349,7 +350,6 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
|
||||
hiddens,
|
||||
activation=None,
|
||||
scope=None):
|
||||
from tensorflow.contrib.distributions import RelaxedOneHotCategorical
|
||||
with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope:
|
||||
if self.config["use_state_preprocessor"]:
|
||||
model = ModelCatalog.get_model({
|
||||
@@ -365,7 +365,7 @@ class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy):
|
||||
out = tf.layers.dense(out, units=hidden, activation=activation)
|
||||
feature = tf.layers.dense(
|
||||
out, units=act_space.shape[0], activation=None)
|
||||
sampler = RelaxedOneHotCategorical(
|
||||
sampler = tfp.distributions.RelaxedOneHotCategorical(
|
||||
temperature=1.0, logits=feature).sample()
|
||||
|
||||
return sampler, feature, model, tf.global_variables(scope.name)
|
||||
|
||||
@@ -47,7 +47,7 @@ class MyKerasRNN(RecurrentTFModelV2):
|
||||
shape=(None, obs_space.shape[0]), name="inputs")
|
||||
state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h")
|
||||
state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c")
|
||||
seq_in = tf.keras.layers.Input(shape=(), name="seq_in")
|
||||
seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)
|
||||
|
||||
# Preprocess observation with a hidden layer and send to LSTM cell
|
||||
dense1 = tf.keras.layers.Dense(
|
||||
|
||||
@@ -34,7 +34,7 @@ class RecurrentTFModelV2(TFModelV2):
|
||||
shape=(None, obs_space.shape[0]))
|
||||
state_in_h = tf.keras.layers.Input(shape=(256, ))
|
||||
state_in_c = tf.keras.layers.Input(shape=(256, ))
|
||||
seq_in = tf.keras.layers.Input(shape=())
|
||||
seq_in = tf.keras.layers.Input(shape=(), dtype=tf.int32)
|
||||
|
||||
# Send to LSTM cell
|
||||
lstm_out, state_h, state_c = tf.keras.layers.LSTM(
|
||||
|
||||
@@ -236,7 +236,7 @@ def build_eager_tf_policy(name,
|
||||
[_flatten_action(action_space.sample())]),
|
||||
SampleBatch.PREV_REWARDS: tf.convert_to_tensor([0.]),
|
||||
}, [
|
||||
tf.convert_to_tensor([s])
|
||||
tf.convert_to_tensor(np.array([s]))
|
||||
for s in self.model.get_initial_state()
|
||||
], tf.convert_to_tensor([1]))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user