diff --git a/python/ray/rllib/ddpg/ddpg.py b/python/ray/rllib/ddpg/ddpg.py index abd9f3d81..9a93e57c1 100644 --- a/python/ray/rllib/ddpg/ddpg.py +++ b/python/ray/rllib/ddpg/ddpg.py @@ -14,10 +14,14 @@ OPTIMIZER_SHARED_CONFIGS = [ DEFAULT_CONFIG = { # === Model === - # Hidden layer sizes of the policy networks + # Hidden layer sizes of the policy network "actor_hiddens": [64, 64], - # Hidden layer sizes of the policy networks + # Hidden layers activation of the policy network + "actor_hidden_activation": "relu", + # Hidden layer sizes of the critic network "critic_hiddens": [64, 64], + # Hidden layers activation of the critic network + "critic_hidden_activation": "relu", # N-step Q learning "n_step": 1, # Config options to pass to the model constructor diff --git a/python/ray/rllib/ddpg/ddpg_policy_graph.py b/python/ray/rllib/ddpg/ddpg_policy_graph.py index 870c5bcec..34aa9682b 100644 --- a/python/ray/rllib/ddpg/ddpg_policy_graph.py +++ b/python/ray/rllib/ddpg/ddpg_policy_graph.py @@ -26,11 +26,13 @@ class PNetwork(object): """Maps an observations (i.e., state) to an action where each entry takes value from (0, 1) due to the sigmoid function.""" - def __init__(self, model, dim_actions, hiddens=[64, 64]): + def __init__( + self, model, dim_actions, hiddens=[64, 64], activation="relu"): action_out = model.last_layer + activation = tf.nn.__dict__[activation] for hidden in hiddens: action_out = layers.fully_connected( - action_out, num_outputs=hidden, activation_fn=tf.nn.relu) + action_out, num_outputs=hidden, activation_fn=activation) # Use sigmoid layer to bound values within (0, 1) # shape of action_scores is [batch_size, dim_actions] self.action_scores = layers.fully_connected( @@ -69,11 +71,14 @@ class ActionNetwork(object): class QNetwork(object): - def __init__(self, model, action_inputs, hiddens=[64, 64]): + def __init__( + self, model, action_inputs, + hiddens=[64, 64], activation="relu"): q_out = tf.concat([model.last_layer, action_inputs], axis=1) + activation = tf.nn.__dict__[activation] for hidden in hiddens: q_out = layers.fully_connected( - q_out, num_outputs=hidden, activation_fn=tf.nn.relu) + q_out, num_outputs=hidden, activation_fn=activation) self.value = layers.fully_connected( q_out, num_outputs=1, activation_fn=None) @@ -128,13 +133,15 @@ class DDPGPolicyGraph(TFPolicyGraph): return QNetwork( ModelCatalog.get_model(obs, 1, config["model"]), actions, - config["critic_hiddens"]).value + config["critic_hiddens"], + config["critic_hidden_activation"]).value def _build_p_network(obs): return PNetwork( ModelCatalog.get_model(obs, 1, config["model"]), dim_actions, - config["actor_hiddens"]).action_scores + config["actor_hiddens"], + config["actor_hidden_activation"]).action_scores def _build_action_network(p_values, stochastic, eps): return ActionNetwork( diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 61b31f708..0da5dc2db 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -24,10 +24,8 @@ MODEL_CONFIGS = [ # === Built-in options === "conv_filters", # Filter configuration "conv_activation", # Nonlinearity for built-in convnet - "fcnet_activation", # Nonlinearity for fully connected net (tanh, relu) "fcnet_hiddens", # Number of hidden layers for fully connected net - "dim", # Dimension for ATARI "grayscale", # Converts ATARI frame to 1 Channel Grayscale image "zero_mean", # Changes frame to range from [-1, 1] if true diff --git a/python/ray/rllib/models/fcnet.py b/python/ray/rllib/models/fcnet.py index ce516f1c7..3f5bcabf6 100644 --- a/python/ray/rllib/models/fcnet.py +++ b/python/ray/rllib/models/fcnet.py @@ -14,7 +14,6 @@ class FullyConnectedNetwork(Model): def _build_layers(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) - activation = get_activation_fn(options.get("fcnet_activation", "tanh")) with tf.name_scope("fc_net"):