From 2bcf72e306a5f3e0abd20ee692dbef820c0fd874 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Mon, 13 Jan 2020 21:48:16 -0800 Subject: [PATCH] DQN distributional model: Replace all legacy tf.contrib imports with tf.keras.layers.xyz or tf.initializers.xyz. (#6772) - This fixes a test case in test_evaluators.py. --- rllib/agents/dqn/distributional_q_model.py | 49 ++++++++++------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/rllib/agents/dqn/distributional_q_model.py b/rllib/agents/dqn/distributional_q_model.py index 765575962..858b8ee93 100644 --- a/rllib/agents/dqn/distributional_q_model.py +++ b/rllib/agents/dqn/distributional_q_model.py @@ -67,18 +67,17 @@ class DistributionalQModel(TFModelV2): action_out = self._noisy_layer( "hidden_%d" % i, action_out, q_hiddens[i], sigma0) elif parameter_noise: - import tensorflow.contrib.layers as layers - action_out = layers.fully_connected( - action_out, - num_outputs=q_hiddens[i], + action_out = tf.keras.layers.Dense( + units=q_hiddens[i], activation_fn=tf.nn.relu, - normalizer_fn=layers.layer_norm) + normalizer_fn=tf.keras.layers.LayerNormalization + )(action_out) else: - action_out = tf.layers.dense( - action_out, + action_out = tf.keras.layers.Dense( units=q_hiddens[i], activation=tf.nn.relu, - name="hidden_%d" % i) + name="hidden_%d" % i + )(action_out) else: # Avoid postprocessing the outputs. This enables custom models # to be used for parametric action DQN. @@ -91,10 +90,10 @@ class DistributionalQModel(TFModelV2): sigma0, non_linear=False) elif q_hiddens: - action_scores = tf.layers.dense( - action_out, + action_scores = tf.keras.layers.Dense( units=self.action_space.n * num_atoms, - activation=None) + activation=None + )(action_out) else: action_scores = model_out if num_atoms > 1: @@ -127,14 +126,15 @@ class DistributionalQModel(TFModelV2): state_out, q_hiddens[i], sigma0) elif parameter_noise: - state_out = tf.contrib.layers.fully_connected( - state_out, - num_outputs=q_hiddens[i], + state_out = tf.keras.layers.Dense( + units=q_hiddens[i], activation_fn=tf.nn.relu, - normalizer_fn=tf.contrib.layers.layer_norm) + normalizer_fn=tf.contrib.layers.layer_norm + )(state_out) else: - state_out = tf.layers.dense( - state_out, units=q_hiddens[i], activation=tf.nn.relu) + state_out = tf.keras.layers.Dense( + units=q_hiddens[i], activation=tf.nn.relu + )(state_out) if use_noisy: state_score = self._noisy_layer( "dueling_output", @@ -143,8 +143,9 @@ class DistributionalQModel(TFModelV2): sigma0, non_linear=False) else: - state_score = tf.layers.dense( - state_out, units=num_atoms, activation=None) + state_score = tf.keras.layers.Dense( + units=num_atoms, activation=None + )(state_out) return state_score if tf.executing_eagerly(): @@ -181,14 +182,12 @@ class DistributionalQModel(TFModelV2): name + "/state_value", reuse=tf.AUTO_REUSE): return build_state_score(model_out) - # TODO(ekl) we shouldn't need to use lambda layers here - q_out = tf.keras.layers.Lambda(build_action_value_in_scope)( - self.model_out) + q_out = build_action_value_in_scope(self.model_out) self.q_value_head = tf.keras.Model(self.model_out, q_out) self.register_variables(self.q_value_head.variables) if dueling: - state_out = tf.keras.layers.Lambda(build_state_score_in_scope)( + state_out = build_state_score_in_scope( self.model_out) self.state_value_head = tf.keras.Model(self.model_out, state_out) self.register_variables(self.state_value_head.variables) @@ -227,8 +226,6 @@ class DistributionalQModel(TFModelV2): distributions and \sigma are trainable variables which are expected to vanish along the training procedure """ - import tensorflow.contrib.layers as layers - in_size = int(action_in.shape[1]) epsilon_in = tf.random_normal(shape=[in_size]) @@ -259,7 +256,7 @@ class DistributionalQModel(TFModelV2): name=prefix + "_fc_w", shape=[in_size, out_size], dtype=tf.float32, - initializer=layers.xavier_initializer()) + initializer=tf.initializers.GlorotUniform()) b = tf.get_variable( name=prefix + "_fc_b", shape=[out_size],