mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 21:12:15 +08:00
df65e87fc7
* tune ppo on control tasks * introduce free log_std * fix * flag for writing logs * fixes * fixes
61 lines
2.3 KiB
Python
61 lines
2.3 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import tensorflow as tf
|
|
import tensorflow.contrib.slim as slim
|
|
|
|
import numpy as np
|
|
|
|
from ray.rllib.models.model import Model
|
|
|
|
|
|
def normc_initializer(std=1.0):
|
|
def _initializer(shape, dtype=None, partition_info=None):
|
|
out = np.random.randn(*shape).astype(np.float32)
|
|
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
|
|
return tf.constant(out)
|
|
return _initializer
|
|
|
|
|
|
class FullyConnectedNetwork(Model):
|
|
"""Generic fully connected network.
|
|
|
|
Options to construct the network are passed to the _init function.
|
|
If options["free_logstd"] is True, the last half of the
|
|
output layer will be free variables that are not dependent on
|
|
inputs. This is often used if the output of the network is used
|
|
to parametrize a probability distribution. In this case, the
|
|
first half of the parameters can be interpreted as a location
|
|
parameter (like a mean) and the second half can be interpreted as
|
|
a scale parameter (like a standard deviation).
|
|
"""
|
|
|
|
def _init(self, inputs, num_outputs, options):
|
|
hiddens = options.get("fcnet_hiddens", [256, 256])
|
|
activation = options.get("fcnet_activation", tf.nn.tanh)
|
|
print("Constructing fcnet {} {}".format(hiddens, activation))
|
|
|
|
if options.get("free_logstd", False):
|
|
num_outputs = num_outputs // 2
|
|
|
|
with tf.name_scope("fc_net"):
|
|
i = 1
|
|
last_layer = inputs
|
|
for size in hiddens:
|
|
last_layer = slim.fully_connected(
|
|
last_layer, size,
|
|
weights_initializer=normc_initializer(1.0),
|
|
activation_fn=activation,
|
|
scope="fc{}".format(i))
|
|
i += 1
|
|
output = slim.fully_connected(
|
|
last_layer, num_outputs,
|
|
weights_initializer=normc_initializer(0.01),
|
|
activation_fn=None, scope="fc_out")
|
|
if options.get("free_logstd", False):
|
|
logstd = tf.get_variable(name="logstd", shape=[num_outputs],
|
|
initializer=tf.zeros_initializer)
|
|
output = tf.concat([output, 0.0 * output + logstd], 1)
|
|
return output, last_layer
|