Files
ray/python/ray/rllib/models/fcnet.py
T
Philipp Moritz df65e87fc7 [rllib] Tune ppo more on control tasks (#777)
* tune ppo on control tasks

* introduce free log_std

* fix

* flag for writing logs

* fixes

* fixes
2017-08-03 16:34:06 -07:00

61 lines
2.3 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
from ray.rllib.models.model import Model
def normc_initializer(std=1.0):
def _initializer(shape, dtype=None, partition_info=None):
out = np.random.randn(*shape).astype(np.float32)
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
return tf.constant(out)
return _initializer
class FullyConnectedNetwork(Model):
"""Generic fully connected network.
Options to construct the network are passed to the _init function.
If options["free_logstd"] is True, the last half of the
output layer will be free variables that are not dependent on
inputs. This is often used if the output of the network is used
to parametrize a probability distribution. In this case, the
first half of the parameters can be interpreted as a location
parameter (like a mean) and the second half can be interpreted as
a scale parameter (like a standard deviation).
"""
def _init(self, inputs, num_outputs, options):
hiddens = options.get("fcnet_hiddens", [256, 256])
activation = options.get("fcnet_activation", tf.nn.tanh)
print("Constructing fcnet {} {}".format(hiddens, activation))
if options.get("free_logstd", False):
num_outputs = num_outputs // 2
with tf.name_scope("fc_net"):
i = 1
last_layer = inputs
for size in hiddens:
last_layer = slim.fully_connected(
last_layer, size,
weights_initializer=normc_initializer(1.0),
activation_fn=activation,
scope="fc{}".format(i))
i += 1
output = slim.fully_connected(
last_layer, num_outputs,
weights_initializer=normc_initializer(0.01),
activation_fn=None, scope="fc_out")
if options.get("free_logstd", False):
logstd = tf.get_variable(name="logstd", shape=[num_outputs],
initializer=tf.zeros_initializer)
output = tf.concat([output, 0.0 * output + logstd], 1)
return output, last_layer