DQN distributional model: Replace all legacy tf.contrib imports with tf.keras.layers.xyz or tf.initializers.xyz. (#6772)

- This fixes a test case in test_evaluators.py.
This commit is contained in:
Sven Mika
2020-01-13 21:48:16 -08:00
committed by Eric Liang
parent 3ea3b56eb1
commit 2bcf72e306
+23 -26
View File
@@ -67,18 +67,17 @@ class DistributionalQModel(TFModelV2):
action_out = self._noisy_layer(
"hidden_%d" % i, action_out, q_hiddens[i], sigma0)
elif parameter_noise:
import tensorflow.contrib.layers as layers
action_out = layers.fully_connected(
action_out,
num_outputs=q_hiddens[i],
action_out = tf.keras.layers.Dense(
units=q_hiddens[i],
activation_fn=tf.nn.relu,
normalizer_fn=layers.layer_norm)
normalizer_fn=tf.keras.layers.LayerNormalization
)(action_out)
else:
action_out = tf.layers.dense(
action_out,
action_out = tf.keras.layers.Dense(
units=q_hiddens[i],
activation=tf.nn.relu,
name="hidden_%d" % i)
name="hidden_%d" % i
)(action_out)
else:
# Avoid postprocessing the outputs. This enables custom models
# to be used for parametric action DQN.
@@ -91,10 +90,10 @@ class DistributionalQModel(TFModelV2):
sigma0,
non_linear=False)
elif q_hiddens:
action_scores = tf.layers.dense(
action_out,
action_scores = tf.keras.layers.Dense(
units=self.action_space.n * num_atoms,
activation=None)
activation=None
)(action_out)
else:
action_scores = model_out
if num_atoms > 1:
@@ -127,14 +126,15 @@ class DistributionalQModel(TFModelV2):
state_out, q_hiddens[i],
sigma0)
elif parameter_noise:
state_out = tf.contrib.layers.fully_connected(
state_out,
num_outputs=q_hiddens[i],
state_out = tf.keras.layers.Dense(
units=q_hiddens[i],
activation_fn=tf.nn.relu,
normalizer_fn=tf.contrib.layers.layer_norm)
normalizer_fn=tf.contrib.layers.layer_norm
)(state_out)
else:
state_out = tf.layers.dense(
state_out, units=q_hiddens[i], activation=tf.nn.relu)
state_out = tf.keras.layers.Dense(
units=q_hiddens[i], activation=tf.nn.relu
)(state_out)
if use_noisy:
state_score = self._noisy_layer(
"dueling_output",
@@ -143,8 +143,9 @@ class DistributionalQModel(TFModelV2):
sigma0,
non_linear=False)
else:
state_score = tf.layers.dense(
state_out, units=num_atoms, activation=None)
state_score = tf.keras.layers.Dense(
units=num_atoms, activation=None
)(state_out)
return state_score
if tf.executing_eagerly():
@@ -181,14 +182,12 @@ class DistributionalQModel(TFModelV2):
name + "/state_value", reuse=tf.AUTO_REUSE):
return build_state_score(model_out)
# TODO(ekl) we shouldn't need to use lambda layers here
q_out = tf.keras.layers.Lambda(build_action_value_in_scope)(
self.model_out)
q_out = build_action_value_in_scope(self.model_out)
self.q_value_head = tf.keras.Model(self.model_out, q_out)
self.register_variables(self.q_value_head.variables)
if dueling:
state_out = tf.keras.layers.Lambda(build_state_score_in_scope)(
state_out = build_state_score_in_scope(
self.model_out)
self.state_value_head = tf.keras.Model(self.model_out, state_out)
self.register_variables(self.state_value_head.variables)
@@ -227,8 +226,6 @@ class DistributionalQModel(TFModelV2):
distributions and \sigma are trainable variables which are expected to
vanish along the training procedure
"""
import tensorflow.contrib.layers as layers
in_size = int(action_in.shape[1])
epsilon_in = tf.random_normal(shape=[in_size])
@@ -259,7 +256,7 @@ class DistributionalQModel(TFModelV2):
name=prefix + "_fc_w",
shape=[in_size, out_size],
dtype=tf.float32,
initializer=layers.xavier_initializer())
initializer=tf.initializers.GlorotUniform())
b = tf.get_variable(
name=prefix + "_fc_b",
shape=[out_size],