mirror of
https://github.com/wassname/ray.git
synced 2026-06-29 07:23:55 +08:00
[rllib] Different Activation Support (#2311)
This commit is contained in:
committed by
Richard Liaw
parent
3cc27d2840
commit
cd63804768
@@ -14,10 +14,14 @@ OPTIMIZER_SHARED_CONFIGS = [
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
# === Model ===
|
||||
# Hidden layer sizes of the policy networks
|
||||
# Hidden layer sizes of the policy network
|
||||
"actor_hiddens": [64, 64],
|
||||
# Hidden layer sizes of the policy networks
|
||||
# Hidden layers activation of the policy network
|
||||
"actor_hidden_activation": "relu",
|
||||
# Hidden layer sizes of the critic network
|
||||
"critic_hiddens": [64, 64],
|
||||
# Hidden layers activation of the critic network
|
||||
"critic_hidden_activation": "relu",
|
||||
# N-step Q learning
|
||||
"n_step": 1,
|
||||
# Config options to pass to the model constructor
|
||||
|
||||
@@ -26,11 +26,13 @@ class PNetwork(object):
|
||||
"""Maps an observations (i.e., state) to an action where each entry takes
|
||||
value from (0, 1) due to the sigmoid function."""
|
||||
|
||||
def __init__(self, model, dim_actions, hiddens=[64, 64]):
|
||||
def __init__(
|
||||
self, model, dim_actions, hiddens=[64, 64], activation="relu"):
|
||||
action_out = model.last_layer
|
||||
activation = tf.nn.__dict__[activation]
|
||||
for hidden in hiddens:
|
||||
action_out = layers.fully_connected(
|
||||
action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
|
||||
action_out, num_outputs=hidden, activation_fn=activation)
|
||||
# Use sigmoid layer to bound values within (0, 1)
|
||||
# shape of action_scores is [batch_size, dim_actions]
|
||||
self.action_scores = layers.fully_connected(
|
||||
@@ -69,11 +71,14 @@ class ActionNetwork(object):
|
||||
|
||||
|
||||
class QNetwork(object):
|
||||
def __init__(self, model, action_inputs, hiddens=[64, 64]):
|
||||
def __init__(
|
||||
self, model, action_inputs,
|
||||
hiddens=[64, 64], activation="relu"):
|
||||
q_out = tf.concat([model.last_layer, action_inputs], axis=1)
|
||||
activation = tf.nn.__dict__[activation]
|
||||
for hidden in hiddens:
|
||||
q_out = layers.fully_connected(
|
||||
q_out, num_outputs=hidden, activation_fn=tf.nn.relu)
|
||||
q_out, num_outputs=hidden, activation_fn=activation)
|
||||
self.value = layers.fully_connected(
|
||||
q_out, num_outputs=1, activation_fn=None)
|
||||
|
||||
@@ -128,13 +133,15 @@ class DDPGPolicyGraph(TFPolicyGraph):
|
||||
return QNetwork(
|
||||
ModelCatalog.get_model(obs, 1, config["model"]),
|
||||
actions,
|
||||
config["critic_hiddens"]).value
|
||||
config["critic_hiddens"],
|
||||
config["critic_hidden_activation"]).value
|
||||
|
||||
def _build_p_network(obs):
|
||||
return PNetwork(
|
||||
ModelCatalog.get_model(obs, 1, config["model"]),
|
||||
dim_actions,
|
||||
config["actor_hiddens"]).action_scores
|
||||
config["actor_hiddens"],
|
||||
config["actor_hidden_activation"]).action_scores
|
||||
|
||||
def _build_action_network(p_values, stochastic, eps):
|
||||
return ActionNetwork(
|
||||
|
||||
@@ -24,10 +24,8 @@ MODEL_CONFIGS = [
|
||||
# === Built-in options ===
|
||||
"conv_filters", # Filter configuration
|
||||
"conv_activation", # Nonlinearity for built-in convnet
|
||||
|
||||
"fcnet_activation", # Nonlinearity for fully connected net (tanh, relu)
|
||||
"fcnet_hiddens", # Number of hidden layers for fully connected net
|
||||
|
||||
"dim", # Dimension for ATARI
|
||||
"grayscale", # Converts ATARI frame to 1 Channel Grayscale image
|
||||
"zero_mean", # Changes frame to range from [-1, 1] if true
|
||||
|
||||
@@ -14,7 +14,6 @@ class FullyConnectedNetwork(Model):
|
||||
|
||||
def _build_layers(self, inputs, num_outputs, options):
|
||||
hiddens = options.get("fcnet_hiddens", [256, 256])
|
||||
|
||||
activation = get_activation_fn(options.get("fcnet_activation", "tanh"))
|
||||
|
||||
with tf.name_scope("fc_net"):
|
||||
|
||||
Reference in New Issue
Block a user