From 7b04ed059e774151d30e68a11dff08abc31859b3 Mon Sep 17 00:00:00 2001
From: Robert Nishihara <robertnishihara@gmail.com>
Date: Sun, 24 Feb 2019 14:26:46 -0800
Subject: [PATCH] Move TensorFlowVariables to ray.experimental.tf_utils.
 (#4145)

---
 doc/source/using-ray-with-tensorflow.rst      | 13 +--
 examples/hyperopt/objective.py                | 40 +++++---
 examples/lbfgs/driver.py                      | 10 +-
 examples/parameter_server/model.py            | 72 ++++++++------
 examples/resnet/resnet_model.py               | 99 ++++++++++++-------
 python/ray/experimental/__init__.py           |  8 +-
 python/ray/experimental/sgd/mnist_example.py  |  6 +-
 .../experimental/sgd/tfbench/test_model.py    |  6 +-
 .../experimental/{tfutils.py => tf_utils.py}  |  6 +-
 python/ray/rllib/agents/ars/policies.py       |  3 +-
 .../rllib/agents/ddpg/ddpg_policy_graph.py    |  7 +-
 python/ray/rllib/agents/es/policies.py        |  3 +-
 .../ray/rllib/evaluation/tf_policy_graph.py   |  3 +-
 python/ray/tests/test_tensorflow.py           | 19 ++--
 14 files changed, 181 insertions(+), 114 deletions(-)
 rename python/ray/experimental/{tfutils.py => tf_utils.py} (99%)

diff --git a/doc/source/using-ray-with-tensorflow.rst b/doc/source/using-ray-with-tensorflow.rst
index 8ea9a8b59..f3b91dcee 100644
--- a/doc/source/using-ray-with-tensorflow.rst
+++ b/doc/source/using-ray-with-tensorflow.rst
@@ -54,8 +54,8 @@ method.
 
 .. code-block:: python
 
-  import ray
-  variables = ray.experimental.TensorFlowVariables(loss, sess)
+  import ray.experimental.tf_utils
+  variables = ray.experimental.tf_utils.TensorFlowVariables(loss, sess)
 
 The ``TensorFlowVariables`` object provides methods for getting and setting the
 weights as well as collecting all of the variables in the model.
@@ -96,6 +96,7 @@ complex Python objects.
   import tensorflow as tf
   import numpy as np
   import ray
+  import ray.experimental.tf_utils
 
   ray.init()
 
@@ -123,7 +124,7 @@ complex Python objects.
           init = tf.global_variables_initializer()
           self.sess = tf.Session()
           # Additional code for setting and getting the weights
-          self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess)
+          self.variables = ray.experimental.tf_utils.TensorFlowVariables(self.loss, self.sess)
           # Return all of the data needed to use the network.
           self.sess.run(init)
 
@@ -254,7 +255,7 @@ For reference, the full code is below:
           init = tf.global_variables_initializer()
           self.sess = tf.Session()
           # Additional code for setting and getting the weights
-          self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess)
+          self.variables = ray.experimental.tf_utils.TensorFlowVariables(self.loss, self.sess)
           # Return all of the data needed to use the network.
           self.sess.run(init)
 
@@ -320,7 +321,7 @@ For reference, the full code is below:
       if iteration % 20 == 0:
           print("Iteration {}: weights are {}".format(iteration, weights))
 
-.. autoclass:: ray.experimental.TensorFlowVariables
+.. autoclass:: ray.experimental.tf_utils.TensorFlowVariables
    :members:
 
 Troubleshooting
@@ -346,7 +347,7 @@ class definiton ``Network`` with a ``TensorFlowVariables`` instance:
           sess = tf.Session()
           init = tf.global_variables_initializer()
           sess.run(init)
-          self.variables = ray.experimental.TensorFlowVariables(c, sess)
+          self.variables = ray.experimental.tf_utils.TensorFlowVariables(c, sess)
 
       def set_weights(self, weights):
           self.variables.set_weights(weights)
diff --git a/examples/hyperopt/objective.py b/examples/hyperopt/objective.py
index 8662b77e7..b531bd219 100644
--- a/examples/hyperopt/objective.py
+++ b/examples/hyperopt/objective.py
@@ -6,9 +6,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import ray
 import tensorflow as tf
 
+import ray
+import ray.experimental.tf_utils
+
 
 def get_batch(data, batch_index, batch_size):
     # This method currently drops data when num_data is not divisible by
@@ -34,8 +36,8 @@ def conv2d(x, W):
 
 
 def max_pool_2x2(x):
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                          padding="SAME")
+    return tf.nn.max_pool(
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
 
 
 def cnn_setup(x, y, keep_prob, lr, stddev):
@@ -59,8 +61,8 @@ def cnn_setup(x, y, keep_prob, lr, stddev):
     W_fc2 = weight([fc_hidden, 10], stddev)
     b_fc2 = bias([10])
     y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
-    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_conv),
-                                   reduction_indices=[1]))
+    cross_entropy = tf.reduce_mean(
+        -tf.reduce_sum(y * tf.log(y_conv), reduction_indices=[1]))
     correct_pred = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
     return (tf.train.AdamOptimizer(lr).minimize(cross_entropy),
             tf.reduce_mean(tf.cast(correct_pred, tf.float32)), cross_entropy)
@@ -69,8 +71,12 @@ def cnn_setup(x, y, keep_prob, lr, stddev):
 # Define a remote function that takes a set of hyperparameters as well as the
 # data, consructs and trains a network, and returns the validation accuracy.
 @ray.remote
-def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels,
-                                   validation_images, validation_labels,
+def train_cnn_and_compute_accuracy(params,
+                                   steps,
+                                   train_images,
+                                   train_labels,
+                                   validation_images,
+                                   validation_labels,
                                    weights=None):
     # Extract the hyperparameters from the params dictionary.
     learning_rate = params["learning_rate"]
@@ -90,7 +96,8 @@ def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels,
         with tf.Session() as sess:
             # Use the TensorFlowVariables utility. This is only necessary if we
             # want to set and get the weights.
-            variables = ray.experimental.TensorFlowVariables(loss, sess)
+            variables = ray.experimental.tf_utils.TensorFlowVariables(
+                loss, sess)
             # Initialize the network weights.
             sess.run(tf.global_variables_initializer())
             # If some network weights were passed in, set those.
@@ -102,12 +109,19 @@ def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels,
                 image_batch = get_batch(train_images, i, batch_size)
                 label_batch = get_batch(train_labels, i, batch_size)
                 # Do one step of training.
-                sess.run(train_step, feed_dict={x: image_batch, y: label_batch,
-                                                keep_prob: keep})
+                sess.run(
+                    train_step,
+                    feed_dict={
+                        x: image_batch,
+                        y: label_batch,
+                        keep_prob: keep
+                    })
             # Training is done, so compute the validation accuracy and the
             # current weights and return.
-            totalacc = accuracy.eval(feed_dict={x: validation_images,
-                                                y: validation_labels,
-                                                keep_prob: 1.0})
+            totalacc = accuracy.eval(feed_dict={
+                x: validation_images,
+                y: validation_labels,
+                keep_prob: 1.0
+            })
             new_weights = variables.get_weights()
     return float(totalacc), new_weights
diff --git a/examples/lbfgs/driver.py b/examples/lbfgs/driver.py
index 9bdced7c7..0dfd81883 100644
--- a/examples/lbfgs/driver.py
+++ b/examples/lbfgs/driver.py
@@ -2,14 +2,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import ray
 import numpy as np
-import scipy.optimize
-import tensorflow as tf
 import os
+import scipy.optimize
 
+import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
 
+import ray
+import ray.experimental.tf_utils
+
 
 class LinearModel(object):
     """Simple class for a one layer neural network.
@@ -55,7 +57,7 @@ class LinearModel(object):
         # In order to get and set the weights, we pass in the loss function to
         # Ray's TensorFlowVariables to automatically create methods to modify
         # the weights.
-        self.variables = ray.experimental.TensorFlowVariables(
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
             cross_entropy, self.sess)
 
     def loss(self, xs, ys):
diff --git a/examples/parameter_server/model.py b/examples/parameter_server/model.py
index d68cb4713..6387f1bc6 100644
--- a/examples/parameter_server/model.py
+++ b/examples/parameter_server/model.py
@@ -6,17 +6,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import ray
+import time
+
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
-import time
+
+import ray
+import ray.experimental.tf_utils
 
 
 def download_mnist_retry(seed=0, max_num_retries=20):
     for _ in range(max_num_retries):
         try:
-            return input_data.read_data_sets("MNIST_data", one_hot=True,
-                                             seed=seed)
+            return input_data.read_data_sets(
+                "MNIST_data", one_hot=True, seed=seed)
         except tf.errors.AlreadyExistsError:
             time.sleep(1)
     raise Exception("Failed to download MNIST.")
@@ -42,30 +45,29 @@ class SimpleCNN(object):
 
             with tf.name_scope('adam_optimizer'):
                 self.optimizer = tf.train.AdamOptimizer(learning_rate)
-                self.train_step = self.optimizer.minimize(
-                    self.cross_entropy)
+                self.train_step = self.optimizer.minimize(self.cross_entropy)
 
             with tf.name_scope('accuracy'):
-                correct_prediction = tf.equal(tf.argmax(self.y_conv, 1),
-                                              tf.argmax(self.y_, 1))
+                correct_prediction = tf.equal(
+                    tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1))
                 correct_prediction = tf.cast(correct_prediction, tf.float32)
             self.accuracy = tf.reduce_mean(correct_prediction)
 
-            self.sess = tf.Session(config=tf.ConfigProto(
-                intra_op_parallelism_threads=1,
-                inter_op_parallelism_threads=1))
+            self.sess = tf.Session(
+                config=tf.ConfigProto(
+                    intra_op_parallelism_threads=1,
+                    inter_op_parallelism_threads=1))
             self.sess.run(tf.global_variables_initializer())
 
             # Helper values.
 
-            self.variables = ray.experimental.TensorFlowVariables(
+            self.variables = ray.experimental.tf_utils.TensorFlowVariables(
                 self.cross_entropy, self.sess)
 
-            self.grads = self.optimizer.compute_gradients(
-                self.cross_entropy)
-            self.grads_placeholder = [
-                (tf.placeholder("float", shape=grad[1].get_shape()), grad[1])
-                for grad in self.grads]
+            self.grads = self.optimizer.compute_gradients(self.cross_entropy)
+            self.grads_placeholder = [(tf.placeholder(
+                "float", shape=grad[1].get_shape()), grad[1])
+                                      for grad in self.grads]
             self.apply_grads_placeholder = self.optimizer.apply_gradients(
                 self.grads_placeholder)
 
@@ -73,17 +75,24 @@ class SimpleCNN(object):
         # TODO(rkn): Computing the weights before and after the training step
         # and taking the diff is awful.
         weights = self.get_weights()[1]
-        self.sess.run(self.train_step, feed_dict={self.x: x,
-                                                  self.y_: y,
-                                                  self.keep_prob: 0.5})
+        self.sess.run(
+            self.train_step,
+            feed_dict={
+                self.x: x,
+                self.y_: y,
+                self.keep_prob: 0.5
+            })
         new_weights = self.get_weights()[1]
         return [x - y for x, y in zip(new_weights, weights)]
 
     def compute_gradients(self, x, y):
-        return self.sess.run([grad[0] for grad in self.grads],
-                             feed_dict={self.x: x,
-                                        self.y_: y,
-                                        self.keep_prob: 0.5})
+        return self.sess.run(
+            [grad[0] for grad in self.grads],
+            feed_dict={
+                self.x: x,
+                self.y_: y,
+                self.keep_prob: 0.5
+            })
 
     def apply_gradients(self, gradients):
         feed_dict = {}
@@ -92,10 +101,13 @@ class SimpleCNN(object):
         self.sess.run(self.apply_grads_placeholder, feed_dict=feed_dict)
 
     def compute_accuracy(self, x, y):
-        return self.sess.run(self.accuracy,
-                             feed_dict={self.x: x,
-                                        self.y_: y,
-                                        self.keep_prob: 1.0})
+        return self.sess.run(
+            self.accuracy,
+            feed_dict={
+                self.x: x,
+                self.y_: y,
+                self.keep_prob: 1.0
+            })
 
     def set_weights(self, variable_names, weights):
         self.variables.set_weights(dict(zip(variable_names, weights)))
@@ -175,8 +187,8 @@ def conv2d(x, W):
 
 def max_pool_2x2(x):
     """max_pool_2x2 downsamples a feature map by 2X."""
-    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
-                          strides=[1, 2, 2, 1], padding='SAME')
+    return tf.nn.max_pool(
+        x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 
 
 def weight_variable(shape):
diff --git a/examples/resnet/resnet_model.py b/examples/resnet/resnet_model.py
index 1246f10fc..7fa6dde69 100644
--- a/examples/resnet/resnet_model.py
+++ b/examples/resnet/resnet_model.py
@@ -13,14 +13,17 @@ from __future__ import print_function
 
 from collections import namedtuple
 import numpy as np
-import ray
+
 import tensorflow as tf
 from tensorflow.python.training import moving_averages
 
-HParams = namedtuple('HParams',
-                     'batch_size, num_classes, min_lrn_rate, lrn_rate, '
-                     'num_residual_units, use_bottleneck, weight_decay_rate, '
-                     'relu_leakiness, optimizer, num_gpus')
+import ray
+import ray.experimental.tf_utils
+
+HParams = namedtuple(
+    'HParams', 'batch_size, num_classes, min_lrn_rate, lrn_rate, '
+    'num_residual_units, use_bottleneck, weight_decay_rate, '
+    'relu_leakiness, optimizer, num_gpus')
 
 
 class ResNet(object):
@@ -51,7 +54,8 @@ class ResNet(object):
             self._build_train_op()
         else:
             # Additional initialization for the test network.
-            self.variables = ray.experimental.TensorFlowVariables(self.cost)
+            self.variables = ray.experimental.tf_utils.TensorFlowVariables(
+                self.cost)
             self.summaries = tf.summary.merge_all()
 
     def _stride_arr(self, stride):
@@ -75,27 +79,24 @@ class ResNet(object):
             filters = [16, 16, 32, 64]
 
         with tf.variable_scope('unit_1_0'):
-            x = res_func(x, filters[0], filters[1],
-                         self._stride_arr(strides[0]),
-                         activate_before_residual[0])
+            x = res_func(x, filters[0], filters[1], self._stride_arr(
+                strides[0]), activate_before_residual[0])
         for i in range(1, self.hps.num_residual_units):
             with tf.variable_scope('unit_1_%d' % i):
                 x = res_func(x, filters[1], filters[1], self._stride_arr(1),
                              False)
 
         with tf.variable_scope('unit_2_0'):
-            x = res_func(x, filters[1], filters[2],
-                         self._stride_arr(strides[1]),
-                         activate_before_residual[1])
+            x = res_func(x, filters[1], filters[2], self._stride_arr(
+                strides[1]), activate_before_residual[1])
         for i in range(1, self.hps.num_residual_units):
             with tf.variable_scope('unit_2_%d' % i):
-                x = res_func(x, filters[2], filters[2],
-                             self._stride_arr(1), False)
+                x = res_func(x, filters[2], filters[2], self._stride_arr(1),
+                             False)
 
         with tf.variable_scope('unit_3_0'):
-            x = res_func(x, filters[2], filters[3],
-                         self._stride_arr(strides[2]),
-                         activate_before_residual[2])
+            x = res_func(x, filters[2], filters[3], self._stride_arr(
+                strides[2]), activate_before_residual[2])
         for i in range(1, self.hps.num_residual_units):
             with tf.variable_scope('unit_3_%d' % i):
                 x = res_func(x, filters[3], filters[3], self._stride_arr(1),
@@ -136,7 +137,8 @@ class ResNet(object):
         apply_op = optimizer.minimize(self.cost, global_step=self.global_step)
         train_ops = [apply_op] + self._extra_train_ops
         self.train_op = tf.group(*train_ops)
-        self.variables = ray.experimental.TensorFlowVariables(self.train_op)
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
+            self.train_op)
 
     def _batch_norm(self, name, x):
         """Batch normalization."""
@@ -144,49 +146,65 @@ class ResNet(object):
             params_shape = [x.get_shape()[-1]]
 
             beta = tf.get_variable(
-                'beta', params_shape, tf.float32,
+                'beta',
+                params_shape,
+                tf.float32,
                 initializer=tf.constant_initializer(0.0, tf.float32))
             gamma = tf.get_variable(
-                'gamma', params_shape, tf.float32,
+                'gamma',
+                params_shape,
+                tf.float32,
                 initializer=tf.constant_initializer(1.0, tf.float32))
 
             if self.mode == 'train':
                 mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments')
 
                 moving_mean = tf.get_variable(
-                    'moving_mean', params_shape, tf.float32,
+                    'moving_mean',
+                    params_shape,
+                    tf.float32,
                     initializer=tf.constant_initializer(0.0, tf.float32),
                     trainable=False)
                 moving_variance = tf.get_variable(
-                    'moving_variance', params_shape, tf.float32,
+                    'moving_variance',
+                    params_shape,
+                    tf.float32,
                     initializer=tf.constant_initializer(1.0, tf.float32),
                     trainable=False)
 
                 self._extra_train_ops.append(
-                    moving_averages.assign_moving_average(moving_mean, mean,
-                                                          0.9))
+                    moving_averages.assign_moving_average(
+                        moving_mean, mean, 0.9))
                 self._extra_train_ops.append(
-                    moving_averages.assign_moving_average(moving_variance,
-                                                          variance, 0.9))
+                    moving_averages.assign_moving_average(
+                        moving_variance, variance, 0.9))
             else:
                 mean = tf.get_variable(
-                    'moving_mean', params_shape, tf.float32,
+                    'moving_mean',
+                    params_shape,
+                    tf.float32,
                     initializer=tf.constant_initializer(0.0, tf.float32),
                     trainable=False)
                 variance = tf.get_variable(
-                    'moving_variance', params_shape, tf.float32,
+                    'moving_variance',
+                    params_shape,
+                    tf.float32,
                     initializer=tf.constant_initializer(1.0, tf.float32),
                     trainable=False)
                 tf.summary.histogram(mean.op.name, mean)
                 tf.summary.histogram(variance.op.name, variance)
             # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper
             # net.
-            y = tf.nn.batch_normalization(
-                x, mean, variance, beta, gamma, 0.001)
+            y = tf.nn.batch_normalization(x, mean, variance, beta, gamma,
+                                          0.001)
             y.set_shape(x.get_shape())
             return y
 
-    def _residual(self, x, in_filter, out_filter, stride,
+    def _residual(self,
+                  x,
+                  in_filter,
+                  out_filter,
+                  stride,
                   activate_before_residual=False):
         """Residual unit with 2 sub layers."""
         if activate_before_residual:
@@ -212,14 +230,18 @@ class ResNet(object):
             if in_filter != out_filter:
                 orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID')
                 orig_x = tf.pad(
-                    orig_x, [[0, 0], [0, 0], [0, 0],
-                             [(out_filter - in_filter) // 2,
-                             (out_filter - in_filter) // 2]])
+                    orig_x,
+                    [[0, 0], [0, 0], [0, 0], [(out_filter - in_filter) // 2,
+                                              (out_filter - in_filter) // 2]])
             x += orig_x
 
         return x
 
-    def _bottleneck_residual(self, x, in_filter, out_filter, stride,
+    def _bottleneck_residual(self,
+                             x,
+                             in_filter,
+                             out_filter,
+                             stride,
                              activate_before_residual=False):
         """Bottleneck residual unit with 3 sub layers."""
         if activate_before_residual:
@@ -271,7 +293,8 @@ class ResNet(object):
             n = filter_size * filter_size * out_filters
             kernel = tf.get_variable(
                 'DW', [filter_size, filter_size, in_filters, out_filters],
-                tf.float32, initializer=tf.random_normal_initializer(
+                tf.float32,
+                initializer=tf.random_normal_initializer(
                     stddev=np.sqrt(2.0 / n)))
             return tf.nn.conv2d(x, kernel, strides, padding='SAME')
 
@@ -285,8 +308,8 @@ class ResNet(object):
         w = tf.get_variable(
             'DW', [x.get_shape()[1], out_dim],
             initializer=tf.uniform_unit_scaling_initializer(factor=1.0))
-        b = tf.get_variable('biases', [out_dim],
-                            initializer=tf.constant_initializer())
+        b = tf.get_variable(
+            'biases', [out_dim], initializer=tf.constant_initializer())
         return tf.nn.xw_plus_b(x, w, b)
 
     def _global_avg_pool(self, x):
diff --git a/python/ray/experimental/__init__.py b/python/ray/experimental/__init__.py
index 31aea1758..425ff2d93 100644
--- a/python/ray/experimental/__init__.py
+++ b/python/ray/experimental/__init__.py
@@ -2,7 +2,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from .tfutils import TensorFlowVariables
 from .features import (
     flush_redis_unsafe, flush_task_and_object_metadata_unsafe,
     flush_finished_tasks_unsafe, flush_evicted_objects_unsafe,
@@ -12,6 +11,13 @@ from .gcs_flush_policy import (set_flushing_policy, GcsFlushPolicy,
 from .named_actors import get_actor, register_actor
 from .api import get, wait
 
+
+def TensorFlowVariables(*args, **kwargs):
+    raise DeprecationWarning(
+        "'ray.experimental.TensorFlowVariables' is deprecated. Instead, please"
+        " do 'from ray.experimental.tf_utils import TensorFlowVariables'.")
+
+
 __all__ = [
     "TensorFlowVariables", "flush_redis_unsafe",
     "flush_task_and_object_metadata_unsafe", "flush_finished_tasks_unsafe",
diff --git a/python/ray/experimental/sgd/mnist_example.py b/python/ray/experimental/sgd/mnist_example.py
index 7c0ff5bd2..836126a9c 100755
--- a/python/ray/experimental/sgd/mnist_example.py
+++ b/python/ray/experimental/sgd/mnist_example.py
@@ -24,7 +24,7 @@ from ray.tune import run_experiments
 from ray.tune.examples.tune_mnist_ray import deepnn
 from ray.experimental.sgd.model import Model
 from ray.experimental.sgd.sgd import DistributedSGD
-from ray.experimental.tfutils import TensorFlowVariables
+import ray.experimental.tf_utils
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--redis-address", default=None, type=str)
@@ -67,8 +67,8 @@ class MNISTModel(Model):
             tf.nn.softmax_cross_entropy_with_logits(
                 labels=self.y_, logits=y_conv))
         self.optimizer = tf.train.AdamOptimizer(1e-4)
-        self.variables = TensorFlowVariables(self.loss,
-                                             tf.get_default_session())
+        self.variables = ray.experimental.tfutils.TensorFlowVariables(
+            self.loss, tf.get_default_session())
 
         # For evaluating test accuracy
         correct_prediction = tf.equal(
diff --git a/python/ray/experimental/sgd/tfbench/test_model.py b/python/ray/experimental/sgd/tfbench/test_model.py
index b0c2fde99..0fe081607 100644
--- a/python/ray/experimental/sgd/tfbench/test_model.py
+++ b/python/ray/experimental/sgd/tfbench/test_model.py
@@ -6,7 +6,7 @@ import tensorflow as tf
 
 from tfbench import model_config
 from ray.experimental.sgd.model import Model
-from ray.experimental.tfutils import TensorFlowVariables
+import ray.experimental.tf_utils
 
 
 class MockDataset():
@@ -47,8 +47,8 @@ class TFBenchModel(Model):
         self.loss = tf.reduce_mean(loss, name='xentropy-loss')
         self.optimizer = tf.train.GradientDescentOptimizer(1e-6)
 
-        self.variables = TensorFlowVariables(self.loss,
-                                             tf.get_default_session())
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
+            self.loss, tf.get_default_session())
 
     def get_loss(self):
         return self.loss
diff --git a/python/ray/experimental/tfutils.py b/python/ray/experimental/tf_utils.py
similarity index 99%
rename from python/ray/experimental/tfutils.py
rename to python/ray/experimental/tf_utils.py
index e33b33cad..bb424134e 100644
--- a/python/ray/experimental/tfutils.py
+++ b/python/ray/experimental/tf_utils.py
@@ -1,8 +1,11 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import numpy as np
+
 from collections import deque, OrderedDict
+import numpy as np
+
+import tensorflow as tf
 
 
 def unflatten(vector, shapes):
@@ -45,7 +48,6 @@ class TensorFlowVariables(object):
             input_variables (List[tf.Variables]): Variables to include in the
                 list.
         """
-        import tensorflow as tf
         self.sess = sess
         if not isinstance(output, (list, tuple)):
             output = [output]
diff --git a/python/ray/rllib/agents/ars/policies.py b/python/ray/rllib/agents/ars/policies.py
index 27f664655..7c4defd69 100644
--- a/python/ray/rllib/agents/ars/policies.py
+++ b/python/ray/rllib/agents/ars/policies.py
@@ -10,6 +10,7 @@ import numpy as np
 import tensorflow as tf
 
 import ray
+import ray.experimental.tf_utils
 from ray.rllib.evaluation.sampler import _unbatch_tuple_actions
 from ray.rllib.utils.filter import get_filter
 from ray.rllib.models import ModelCatalog
@@ -81,7 +82,7 @@ class GenericPolicy(object):
         dist = dist_class(model.outputs)
         self.sampler = dist.sample()
 
-        self.variables = ray.experimental.TensorFlowVariables(
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
             model.outputs, self.sess)
 
         self.num_params = sum(
diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
index 028a2e266..5f3e7dd5d 100644
--- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
+++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py
@@ -8,8 +8,9 @@ import tensorflow as tf
 import tensorflow.contrib.layers as layers
 
 import ray
-from ray.rllib.agents.dqn.dqn_policy_graph import _huber_loss, \
-    _minimize_and_clip, _scope_vars, _postprocess_dqn
+import ray.experimental.tf_utils
+from ray.rllib.agents.dqn.dqn_policy_graph import (
+    _huber_loss, _minimize_and_clip, _scope_vars, _postprocess_dqn)
 from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.error import UnsupportedSpaceException
@@ -387,7 +388,7 @@ class DDPGPolicyGraph(TFPolicyGraph):
 
         # Note that this encompasses both the policy and Q-value networks and
         # their corresponding target networks
-        self.variables = ray.experimental.TensorFlowVariables(
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
             tf.group(q_tp0, q_tp1), self.sess)
 
         # Hard initial update
diff --git a/python/ray/rllib/agents/es/policies.py b/python/ray/rllib/agents/es/policies.py
index cf2da630e..61f748ce0 100644
--- a/python/ray/rllib/agents/es/policies.py
+++ b/python/ray/rllib/agents/es/policies.py
@@ -10,6 +10,7 @@ import numpy as np
 import tensorflow as tf
 
 import ray
+import ray.experimental.tf_utils
 from ray.rllib.evaluation.sampler import _unbatch_tuple_actions
 from ray.rllib.models import ModelCatalog
 from ray.rllib.utils.filter import get_filter
@@ -59,7 +60,7 @@ class GenericPolicy(object):
         dist = dist_class(model.outputs)
         self.sampler = dist.sample()
 
-        self.variables = ray.experimental.TensorFlowVariables(
+        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
             model.outputs, self.sess)
 
         self.num_params = sum(
diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/python/ray/rllib/evaluation/tf_policy_graph.py
index 3271b49e6..232b869ec 100644
--- a/python/ray/rllib/evaluation/tf_policy_graph.py
+++ b/python/ray/rllib/evaluation/tf_policy_graph.py
@@ -9,6 +9,7 @@ import tensorflow as tf
 import numpy as np
 
 import ray
+import ray.experimental.tf_utils
 from ray.rllib.evaluation.policy_graph import PolicyGraph
 from ray.rllib.models.lstm import chop_into_sequences
 from ray.rllib.utils.annotations import override, DeveloperAPI
@@ -120,7 +121,7 @@ class TFPolicyGraph(PolicyGraph):
                                 for (g, v) in self.gradients(self._optimizer)
                                 if g is not None]
         self._grads = [g for (g, v) in self._grads_and_vars]
-        self._variables = ray.experimental.TensorFlowVariables(
+        self._variables = ray.experimental.tf_utils.TensorFlowVariables(
             self._loss, self._sess)
 
         # gather update ops for any batch norm layers
diff --git a/python/ray/tests/test_tensorflow.py b/python/ray/tests/test_tensorflow.py
index f37d81b79..258ca0124 100644
--- a/python/ray/tests/test_tensorflow.py
+++ b/python/ray/tests/test_tensorflow.py
@@ -7,6 +7,7 @@ import pytest
 import tensorflow as tf
 
 import ray
+import ray.experimental.tf_utils
 
 
 def make_linear_network(w_name=None, b_name=None):
@@ -31,7 +32,7 @@ class LossActor(object):
             loss, init, _, _ = make_linear_network()
             sess = tf.Session()
             # Additional code for setting and getting the weights.
-            weights = ray.experimental.TensorFlowVariables(
+            weights = ray.experimental.tf_utils.TensorFlowVariables(
                 loss if use_loss else None, sess, input_variables=var)
         # Return all of the data needed to use the network.
         self.values = [weights, init, sess]
@@ -53,7 +54,8 @@ class NetActor(object):
             loss, init, _, _ = make_linear_network()
             sess = tf.Session()
             # Additional code for setting and getting the weights.
-            variables = ray.experimental.TensorFlowVariables(loss, sess)
+            variables = ray.experimental.tf_utils.TensorFlowVariables(
+                loss, sess)
         # Return all of the data needed to use the network.
         self.values = [variables, init, sess]
         sess.run(init)
@@ -73,7 +75,8 @@ class TrainActor(object):
         with tf.Graph().as_default():
             loss, init, x_data, y_data = make_linear_network()
             sess = tf.Session()
-            variables = ray.experimental.TensorFlowVariables(loss, sess)
+            variables = ray.experimental.tf_utils.TensorFlowVariables(
+                loss, sess)
             optimizer = tf.train.GradientDescentOptimizer(0.9)
             grads = optimizer.compute_gradients(loss)
             train = optimizer.apply_gradients(grads)
@@ -107,7 +110,7 @@ def test_tensorflow_variables(ray_start_regular):
     loss, init, _, _ = make_linear_network()
     sess.run(init)
 
-    variables = ray.experimental.TensorFlowVariables(loss, sess)
+    variables = ray.experimental.tf_utils.TensorFlowVariables(loss, sess)
     weights = variables.get_weights()
 
     for (name, val) in weights.items():
@@ -119,7 +122,7 @@ def test_tensorflow_variables(ray_start_regular):
     loss2, init2, _, _ = make_linear_network("w", "b")
     sess.run(init2)
 
-    variables2 = ray.experimental.TensorFlowVariables(loss2, sess)
+    variables2 = ray.experimental.tf_utils.TensorFlowVariables(loss2, sess)
     weights2 = variables2.get_weights()
 
     for (name, val) in weights2.items():
@@ -131,7 +134,7 @@ def test_tensorflow_variables(ray_start_regular):
     variables2.set_flat(flat_weights)
     assert_almost_equal(flat_weights, variables2.get_flat())
 
-    variables3 = ray.experimental.TensorFlowVariables([loss2])
+    variables3 = ray.experimental.tf_utils.TensorFlowVariables([loss2])
     assert variables3.sess is None
     sess = tf.Session()
     variables3.set_session(sess)
@@ -205,7 +208,7 @@ def test_network_driver_worker_independent(ray_start_regular):
     # Create a network on the driver locally.
     sess1 = tf.Session()
     loss1, init1, _, _ = make_linear_network()
-    ray.experimental.TensorFlowVariables(loss1, sess1)
+    ray.experimental.tf_utils.TensorFlowVariables(loss1, sess1)
     sess1.run(init1)
 
     net2 = ray.remote(NetActor).remote()
@@ -221,7 +224,7 @@ def test_variables_control_dependencies(ray_start_regular):
     sess = tf.Session()
     loss, init, _, _ = make_linear_network()
     minimizer = tf.train.MomentumOptimizer(0.9, 0.9).minimize(loss)
-    net_vars = ray.experimental.TensorFlowVariables(minimizer, sess)
+    net_vars = ray.experimental.tf_utils.TensorFlowVariables(minimizer, sess)
     sess.run(init)
 
     # Tests if all variables are properly retrieved, 2 variables and 2