[rllib] Different Activation Support (#2311)

This commit is contained in:
Sergey Kolesnikov
2018-06-29 04:41:04 +03:00
committed by Richard Liaw
parent 3cc27d2840
commit cd63804768
4 changed files with 19 additions and 11 deletions
+6 -2
View File
@@ -14,10 +14,14 @@ OPTIMIZER_SHARED_CONFIGS = [
DEFAULT_CONFIG = {
# === Model ===
# Hidden layer sizes of the policy networks
# Hidden layer sizes of the policy network
"actor_hiddens": [64, 64],
# Hidden layer sizes of the policy networks
# Hidden layers activation of the policy network
"actor_hidden_activation": "relu",
# Hidden layer sizes of the critic network
"critic_hiddens": [64, 64],
# Hidden layers activation of the critic network
"critic_hidden_activation": "relu",
# N-step Q learning
"n_step": 1,
# Config options to pass to the model constructor
+13 -6
View File
@@ -26,11 +26,13 @@ class PNetwork(object):
"""Maps an observations (i.e., state) to an action where each entry takes
value from (0, 1) due to the sigmoid function."""
def __init__(self, model, dim_actions, hiddens=[64, 64]):
def __init__(
self, model, dim_actions, hiddens=[64, 64], activation="relu"):
action_out = model.last_layer
activation = tf.nn.__dict__[activation]
for hidden in hiddens:
action_out = layers.fully_connected(
action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
action_out, num_outputs=hidden, activation_fn=activation)
# Use sigmoid layer to bound values within (0, 1)
# shape of action_scores is [batch_size, dim_actions]
self.action_scores = layers.fully_connected(
@@ -69,11 +71,14 @@ class ActionNetwork(object):
class QNetwork(object):
def __init__(self, model, action_inputs, hiddens=[64, 64]):
def __init__(
self, model, action_inputs,
hiddens=[64, 64], activation="relu"):
q_out = tf.concat([model.last_layer, action_inputs], axis=1)
activation = tf.nn.__dict__[activation]
for hidden in hiddens:
q_out = layers.fully_connected(
q_out, num_outputs=hidden, activation_fn=tf.nn.relu)
q_out, num_outputs=hidden, activation_fn=activation)
self.value = layers.fully_connected(
q_out, num_outputs=1, activation_fn=None)
@@ -128,13 +133,15 @@ class DDPGPolicyGraph(TFPolicyGraph):
return QNetwork(
ModelCatalog.get_model(obs, 1, config["model"]),
actions,
config["critic_hiddens"]).value
config["critic_hiddens"],
config["critic_hidden_activation"]).value
def _build_p_network(obs):
return PNetwork(
ModelCatalog.get_model(obs, 1, config["model"]),
dim_actions,
config["actor_hiddens"]).action_scores
config["actor_hiddens"],
config["actor_hidden_activation"]).action_scores
def _build_action_network(p_values, stochastic, eps):
return ActionNetwork(
-2
View File
@@ -24,10 +24,8 @@ MODEL_CONFIGS = [
# === Built-in options ===
"conv_filters", # Filter configuration
"conv_activation", # Nonlinearity for built-in convnet
"fcnet_activation", # Nonlinearity for fully connected net (tanh, relu)
"fcnet_hiddens", # Number of hidden layers for fully connected net
"dim", # Dimension for ATARI
"grayscale", # Converts ATARI frame to 1 Channel Grayscale image
"zero_mean", # Changes frame to range from [-1, 1] if true
-1
View File
@@ -14,7 +14,6 @@ class FullyConnectedNetwork(Model):
def _build_layers(self, inputs, num_outputs, options):
hiddens = options.get("fcnet_hiddens", [256, 256])
activation = get_activation_fn(options.get("fcnet_activation", "tanh"))
with tf.name_scope("fc_net"):