[rllib] Different Activation Support (#2311)

2026-06-29 07:23:55 +08:00 · 2018-06-29 04:41:04 +03:00
parent 3cc27d2840
commit cd63804768
4 changed files with 19 additions and 11 deletions
@@ -14,10 +14,14 @@ OPTIMIZER_SHARED_CONFIGS = [

 DEFAULT_CONFIG = {
    # === Model ===
-    # Hidden layer sizes of the policy networks
+    # Hidden layer sizes of the policy network
    "actor_hiddens": [64, 64],
-    # Hidden layer sizes of the policy networks
+    # Hidden layers activation of the policy network
+    "actor_hidden_activation": "relu",
+    # Hidden layer sizes of the critic network
    "critic_hiddens": [64, 64],
+    # Hidden layers activation of the critic network
+    "critic_hidden_activation": "relu",
    # N-step Q learning
    "n_step": 1,
    # Config options to pass to the model constructor
@@ -26,11 +26,13 @@ class PNetwork(object):
    """Maps an observations (i.e., state) to an action where each entry takes
    value from (0, 1) due to the sigmoid function."""

-    def __init__(self, model, dim_actions, hiddens=[64, 64]):
+    def __init__(
+            self, model, dim_actions, hiddens=[64, 64], activation="relu"):
        action_out = model.last_layer
+        activation = tf.nn.__dict__[activation]
        for hidden in hiddens:
            action_out = layers.fully_connected(
-                action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
+                action_out, num_outputs=hidden, activation_fn=activation)
        # Use sigmoid layer to bound values within (0, 1)
        # shape of action_scores is [batch_size, dim_actions]
        self.action_scores = layers.fully_connected(
@@ -69,11 +71,14 @@ class ActionNetwork(object):


 class QNetwork(object):
-    def __init__(self, model, action_inputs, hiddens=[64, 64]):
+    def __init__(
+            self, model, action_inputs,
+            hiddens=[64, 64], activation="relu"):
        q_out = tf.concat([model.last_layer, action_inputs], axis=1)
+        activation = tf.nn.__dict__[activation]
        for hidden in hiddens:
            q_out = layers.fully_connected(
-                q_out, num_outputs=hidden, activation_fn=tf.nn.relu)
+                q_out, num_outputs=hidden, activation_fn=activation)
        self.value = layers.fully_connected(
            q_out, num_outputs=1, activation_fn=None)

@@ -128,13 +133,15 @@ class DDPGPolicyGraph(TFPolicyGraph):
            return QNetwork(
                ModelCatalog.get_model(obs, 1, config["model"]),
                actions,
-                config["critic_hiddens"]).value
+                config["critic_hiddens"],
+                config["critic_hidden_activation"]).value

        def _build_p_network(obs):
            return PNetwork(
                ModelCatalog.get_model(obs, 1, config["model"]),
                dim_actions,
-                config["actor_hiddens"]).action_scores
+                config["actor_hiddens"],
+                config["actor_hidden_activation"]).action_scores

        def _build_action_network(p_values, stochastic, eps):
            return ActionNetwork(
@@ -24,10 +24,8 @@ MODEL_CONFIGS = [
    # === Built-in options ===
    "conv_filters",  # Filter configuration
    "conv_activation",  # Nonlinearity for built-in convnet
-
    "fcnet_activation",  # Nonlinearity for fully connected net (tanh, relu)
    "fcnet_hiddens",  # Number of hidden layers for fully connected net
-
    "dim",  # Dimension for ATARI
    "grayscale",  # Converts ATARI frame to 1 Channel Grayscale image
    "zero_mean",  # Changes frame to range from [-1, 1] if true
@@ -14,7 +14,6 @@ class FullyConnectedNetwork(Model):

    def _build_layers(self, inputs, num_outputs, options):
        hiddens = options.get("fcnet_hiddens", [256, 256])
-
        activation = get_activation_fn(options.get("fcnet_activation", "tanh"))

        with tf.name_scope("fc_net"):