diff --git a/python/ray/rllib/models/action_dist.py b/python/ray/rllib/models/action_dist.py
index d1dc8687f..26455add8 100644
--- a/python/ray/rllib/models/action_dist.py
+++ b/python/ray/rllib/models/action_dist.py
@@ -70,27 +70,27 @@ class DiagGaussian(ActionDistribution):
 
     def __init__(self, inputs):
         ActionDistribution.__init__(self, inputs)
-        mean, logstd = tf.split(inputs, 2, axis=1)
+        mean, log_std = tf.split(inputs, 2, axis=1)
         self.mean = mean
-        self.logstd = logstd
-        self.std = tf.exp(logstd)
+        self.log_std = log_std
+        self.std = tf.exp(log_std)
 
     def logp(self, x):
         return (-0.5 * tf.reduce_sum(tf.square((x - self.mean) / self.std),
                                      reduction_indices=[1]) -
                 0.5 * np.log(2.0 * np.pi) * tf.to_float(tf.shape(x)[1]) -
-                tf.reduce_sum(self.logstd, reduction_indices=[1]))
+                tf.reduce_sum(self.log_std, reduction_indices=[1]))
 
     def kl(self, other):
         assert isinstance(other, DiagGaussian)
-        return tf.reduce_sum(other.logstd - self.logstd +
+        return tf.reduce_sum(other.log_std - self.log_std +
                              (tf.square(self.std) +
                               tf.square(self.mean - other.mean)) /
                              (2.0 * tf.square(other.std)) - 0.5,
                              reduction_indices=[1])
 
     def entropy(self):
-        return tf.reduce_sum(self.logstd + .5 * np.log(2.0 * np.pi * np.e),
+        return tf.reduce_sum(self.log_std + .5 * np.log(2.0 * np.pi * np.e),
                              reduction_indices=[1])
 
     def sample(self):
diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py
index effaa3326..5b942a55f 100644
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@@ -48,7 +48,7 @@ class ModelCatalog(object):
             "Unsupported args: {} {}".format(action_space, dist_type))
 
     @staticmethod
-    def get_model(inputs, num_outputs, options=None):
+    def get_model(inputs, num_outputs, options=dict()):
         """Returns a suitable model conforming to given input and output specs.
 
         Args:
@@ -60,9 +60,6 @@ class ModelCatalog(object):
             model (Model): Neural network model.
         """
 
-        if options is None:
-            options = {}
-
         obs_rank = len(inputs.get_shape()) - 1
 
         if obs_rank > 1:
@@ -71,7 +68,7 @@ class ModelCatalog(object):
         return FullyConnectedNetwork(inputs, num_outputs, options)
 
     @staticmethod
-    def ConvolutionalNetwork(inputs, num_outputs, options=None):
+    def ConvolutionalNetwork(inputs, num_outputs, options=dict()):
         return ConvolutionalNetwork(inputs, num_outputs, options)
 
     @staticmethod
diff --git a/python/ray/rllib/models/fcnet.py b/python/ray/rllib/models/fcnet.py
index 3faac9d38..1990158cb 100644
--- a/python/ray/rllib/models/fcnet.py
+++ b/python/ray/rllib/models/fcnet.py
@@ -19,17 +19,7 @@ def normc_initializer(std=1.0):
 
 
 class FullyConnectedNetwork(Model):
-    """Generic fully connected network.
-
-    Options to construct the network are passed to the _init function.
-    If options["free_logstd"] is True, the last half of the
-    output layer will be free variables that are not dependent on
-    inputs. This is often used if the output of the network is used
-    to parametrize a probability distribution. In this case, the
-    first half of the parameters can be interpreted as a location
-    parameter (like a mean) and the second half can be interpreted as
-    a scale parameter (like a standard deviation).
-    """
+    """Generic fully connected network."""
 
     def _init(self, inputs, num_outputs, options):
         hiddens = options.get("fcnet_hiddens", [256, 256])
@@ -40,9 +30,6 @@ class FullyConnectedNetwork(Model):
             activation = tf.nn.relu
         print("Constructing fcnet {} {}".format(hiddens, activation))
 
-        if options.get("free_logstd", False):
-            num_outputs = num_outputs // 2
-
         with tf.name_scope("fc_net"):
             i = 1
             last_layer = inputs
@@ -57,8 +44,4 @@ class FullyConnectedNetwork(Model):
                 last_layer, num_outputs,
                 weights_initializer=normc_initializer(0.01),
                 activation_fn=None, scope="fc_out")
-            if options.get("free_logstd", False):
-                logstd = tf.get_variable(name="logstd", shape=[num_outputs],
-                                         initializer=tf.zeros_initializer)
-                output = tf.concat([output, 0.0 * output + logstd], 1)
             return output, last_layer
diff --git a/python/ray/rllib/models/model.py b/python/ray/rllib/models/model.py
index 56cf5e866..a8cfedc33 100644
--- a/python/ray/rllib/models/model.py
+++ b/python/ray/rllib/models/model.py
@@ -2,6 +2,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import tensorflow as tf
+
 
 class Model(object):
     """Defines an abstract network model for use with RLlib.
@@ -13,6 +15,14 @@ class Model(object):
     The last layer of the network can also be retrieved if the algorithm
     needs to further post-processing (e.g. Actor and Critic networks in A3C).
 
+    If options["free_log_std"] is True, the last half of the
+    output layer will be free variables that are not dependent on
+    inputs. This is often used if the output of the network is used
+    to parametrize a probability distribution. In this case, the
+    first half of the parameters can be interpreted as a location
+    parameter (like a mean) and the second half can be interpreted as
+    a scale parameter (like a standard deviation).
+
     Attributes:
         inputs (Tensor): The input placeholder for this model.
         outputs (Tensor): The output vector of this model.
@@ -21,8 +31,16 @@ class Model(object):
 
     def __init__(self, inputs, num_outputs, options):
         self.inputs = inputs
+        if options.get("free_log_std", False):
+            assert num_outputs % 2 == 0
+            num_outputs = num_outputs // 2
         self.outputs, self.last_layer = self._init(
             inputs, num_outputs, options)
+        if options.get("free_log_std", False):
+            log_std = tf.get_variable(name="log_std", shape=[num_outputs],
+                                      initializer=tf.zeros_initializer)
+            self.outputs = tf.concat(
+                [self.outputs, 0.0 * self.outputs + log_std], 1)
 
     def _init(self):
         """Builds and returns the output and last layer of the network."""
diff --git a/python/ray/rllib/policy_gradient/policy_gradient.py b/python/ray/rllib/policy_gradient/policy_gradient.py
index dbee42167..46d721744 100644
--- a/python/ray/rllib/policy_gradient/policy_gradient.py
+++ b/python/ray/rllib/policy_gradient/policy_gradient.py
@@ -52,7 +52,7 @@ DEFAULT_CONFIG = {
     "clip_param": 0.3,
     # Target value for KL divergence
     "kl_target": 0.01,
-    "model": {"free_logstd": False},
+    "model": {"free_log_std": False},
     # Number of timesteps collected in each outer loop
     "timesteps_per_batch": 40000,
     # Each tasks performs rollouts until at least this
diff --git a/python/ray/rllib/test.sh b/python/ray/rllib/test.sh
index 162c53137..07c5dcbfc 100755
--- a/python/ray/rllib/test.sh
+++ b/python/ray/rllib/test.sh
@@ -6,9 +6,9 @@ python train.py --env CartPole-v1 --config '{"kl_coeff": 1.0, "num_sgd_iter": 20
 
 python train.py --env Walker2d-v1 --config '{"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64}' --alg PolicyGradient --upload-dir s3://bucketname/
 
-python train.py --env Humanoid-v1 --config '{"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64, "model": {"free_logstd": true}, "use_gae": false}' --alg PolicyGradient --upload-dir s3://bucketname/
+python train.py --env Humanoid-v1 --config '{"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64, "model": {"free_log_std": true}, "use_gae": false}' --alg PolicyGradient --upload-dir s3://bucketname/
 
-python train.py --env Humanoid-v1 --config '{"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64, "model": {"free_logstd": true}, "write_logs": false}' --alg PolicyGradient --upload-dir s3://bucketname/
+python train.py --env Humanoid-v1 --config '{"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64, "model": {"free_log_std": true}, "write_logs": false}' --alg PolicyGradient --upload-dir s3://bucketname/
 
 python train.py --env PongNoFrameskip-v0 --alg DQN --upload-dir s3://bucketname/
 python train.py --env PongDeterministic-v0 --alg A3C --upload-dir s3://bucketname/