mirror of
https://github.com/wassname/Run-Skeleton-Run.git
synced 2026-06-28 00:42:06 +08:00
96 lines
3.5 KiB
Python
96 lines
3.5 KiB
Python
import tensorflow as tf
|
|
import baselines.baselines_common.tf_util as U
|
|
from baselines.baselines_common.mpi_running_mean_std import RunningMeanStd
|
|
from baselines.baselines_common.distributions import make_pdtype, DiagGaussianPdType, BernoulliPdType
|
|
|
|
|
|
def mlp_block(x, name, num_hid_layers, hid_size, activation_fn=tf.nn.tanh):
|
|
with tf.variable_scope(name_or_scope=name):
|
|
for i in range(num_hid_layers):
|
|
x = U.dense(
|
|
x, hid_size,
|
|
name="fc%i" % (i + 1), weight_init=U.normc_initializer(1.0))
|
|
x = activation_fn(x)
|
|
return x
|
|
|
|
|
|
def feature_net(x, name, num_hid_layers, hid_size, activation_fn=tf.nn.tanh):
|
|
with tf.variable_scope(name_or_scope=name):
|
|
x = mlp_block(
|
|
x, name="mlp",
|
|
hid_size=hid_size, num_hid_layers=num_hid_layers, activation_fn=activation_fn)
|
|
return x
|
|
|
|
|
|
class Actor(object):
|
|
def __init__(self, name, *args, **kwargs):
|
|
with tf.variable_scope(name):
|
|
self._init(*args, **kwargs)
|
|
self.scope = tf.get_variable_scope().name
|
|
|
|
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True, noise_type=None):
|
|
if noise_type == "gaussian":
|
|
self.pdtype = pdtype = DiagGaussianPdType(ac_space.shape[0])
|
|
else:
|
|
self.pdtype = pdtype = make_pdtype(ac_space)
|
|
|
|
ob = U.get_placeholder(
|
|
name="ob", dtype=tf.float32,
|
|
shape=[None] + list(ob_space.shape))
|
|
|
|
with tf.variable_scope("obfilter"):
|
|
self.ob_rms = RunningMeanStd(shape=ob_space.shape)
|
|
obz = (ob - self.ob_rms.mean) / self.ob_rms.std
|
|
obz = tf.clip_by_value(obz, -5.0, 5.0)
|
|
|
|
# critic net (value network)
|
|
last_out = feature_net(
|
|
obz, name="vf",
|
|
num_hid_layers=num_hid_layers, hid_size=hid_size,
|
|
activation_fn=tf.nn.tanh)
|
|
self.vpred = U.dense(
|
|
last_out, 1,
|
|
name="vf_final", weight_init=U.normc_initializer(1.0))[:, 0]
|
|
|
|
# actor net (policy network)
|
|
last_out = feature_net(
|
|
obz, name="pol",
|
|
num_hid_layers=num_hid_layers, hid_size=hid_size,
|
|
activation_fn=tf.nn.tanh)
|
|
|
|
if gaussian_fixed_var and isinstance(self.pdtype, DiagGaussianPdType):
|
|
mean = U.dense(
|
|
last_out, pdtype.param_shape()[0] // 2,
|
|
name="pol_final", weight_init=U.normc_initializer(0.01))
|
|
logstd = tf.get_variable(
|
|
name="logstd", shape=[1, pdtype.param_shape()[0] // 2],
|
|
initializer=tf.zeros_initializer())
|
|
pdparam = U.concatenate([mean, mean * 0.0 + logstd], axis=1)
|
|
else:
|
|
pdparam = U.dense(
|
|
last_out, pdtype.param_shape()[0],
|
|
name="pol_final", weight_init=U.normc_initializer(0.01))
|
|
|
|
# pd - probability distribution
|
|
self.pd = pdtype.pdfromflat(pdparam)
|
|
|
|
self.state_in = []
|
|
self.state_out = []
|
|
|
|
stochastic = tf.placeholder(dtype=tf.bool, shape=())
|
|
ac = U.switch(stochastic, self.pd.sample(), self.pd.mode())
|
|
self._act = U.function([stochastic, ob], [ac, self.vpred])
|
|
|
|
def act(self, stochastic, ob):
|
|
ac1, vpred1 = self._act(stochastic, ob[None])
|
|
return ac1[0], vpred1[0]
|
|
|
|
def get_variables(self):
|
|
return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.scope)
|
|
|
|
def get_trainable_variables(self):
|
|
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
|
|
|
|
def get_initial_state(self):
|
|
return []
|