From 7b04ed059e774151d30e68a11dff08abc31859b3 Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Sun, 24 Feb 2019 14:26:46 -0800 Subject: [PATCH] Move TensorFlowVariables to ray.experimental.tf_utils. (#4145) --- doc/source/using-ray-with-tensorflow.rst | 13 +-- examples/hyperopt/objective.py | 40 +++++--- examples/lbfgs/driver.py | 10 +- examples/parameter_server/model.py | 72 ++++++++------ examples/resnet/resnet_model.py | 99 ++++++++++++------- python/ray/experimental/__init__.py | 8 +- python/ray/experimental/sgd/mnist_example.py | 6 +- .../experimental/sgd/tfbench/test_model.py | 6 +- .../experimental/{tfutils.py => tf_utils.py} | 6 +- python/ray/rllib/agents/ars/policies.py | 3 +- .../rllib/agents/ddpg/ddpg_policy_graph.py | 7 +- python/ray/rllib/agents/es/policies.py | 3 +- .../ray/rllib/evaluation/tf_policy_graph.py | 3 +- python/ray/tests/test_tensorflow.py | 19 ++-- 14 files changed, 181 insertions(+), 114 deletions(-) rename python/ray/experimental/{tfutils.py => tf_utils.py} (99%) diff --git a/doc/source/using-ray-with-tensorflow.rst b/doc/source/using-ray-with-tensorflow.rst index 8ea9a8b59..f3b91dcee 100644 --- a/doc/source/using-ray-with-tensorflow.rst +++ b/doc/source/using-ray-with-tensorflow.rst @@ -54,8 +54,8 @@ method. .. code-block:: python - import ray - variables = ray.experimental.TensorFlowVariables(loss, sess) + import ray.experimental.tf_utils + variables = ray.experimental.tf_utils.TensorFlowVariables(loss, sess) The ``TensorFlowVariables`` object provides methods for getting and setting the weights as well as collecting all of the variables in the model. @@ -96,6 +96,7 @@ complex Python objects. import tensorflow as tf import numpy as np import ray + import ray.experimental.tf_utils ray.init() @@ -123,7 +124,7 @@ complex Python objects. init = tf.global_variables_initializer() self.sess = tf.Session() # Additional code for setting and getting the weights - self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess) + self.variables = ray.experimental.tf_utils.TensorFlowVariables(self.loss, self.sess) # Return all of the data needed to use the network. self.sess.run(init) @@ -254,7 +255,7 @@ For reference, the full code is below: init = tf.global_variables_initializer() self.sess = tf.Session() # Additional code for setting and getting the weights - self.variables = ray.experimental.TensorFlowVariables(self.loss, self.sess) + self.variables = ray.experimental.tf_utils.TensorFlowVariables(self.loss, self.sess) # Return all of the data needed to use the network. self.sess.run(init) @@ -320,7 +321,7 @@ For reference, the full code is below: if iteration % 20 == 0: print("Iteration {}: weights are {}".format(iteration, weights)) -.. autoclass:: ray.experimental.TensorFlowVariables +.. autoclass:: ray.experimental.tf_utils.TensorFlowVariables :members: Troubleshooting @@ -346,7 +347,7 @@ class definiton ``Network`` with a ``TensorFlowVariables`` instance: sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) - self.variables = ray.experimental.TensorFlowVariables(c, sess) + self.variables = ray.experimental.tf_utils.TensorFlowVariables(c, sess) def set_weights(self, weights): self.variables.set_weights(weights) diff --git a/examples/hyperopt/objective.py b/examples/hyperopt/objective.py index 8662b77e7..b531bd219 100644 --- a/examples/hyperopt/objective.py +++ b/examples/hyperopt/objective.py @@ -6,9 +6,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import ray import tensorflow as tf +import ray +import ray.experimental.tf_utils + def get_batch(data, batch_index, batch_size): # This method currently drops data when num_data is not divisible by @@ -34,8 +36,8 @@ def conv2d(x, W): def max_pool_2x2(x): - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding="SAME") + return tf.nn.max_pool( + x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") def cnn_setup(x, y, keep_prob, lr, stddev): @@ -59,8 +61,8 @@ def cnn_setup(x, y, keep_prob, lr, stddev): W_fc2 = weight([fc_hidden, 10], stddev) b_fc2 = bias([10]) y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2) - cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_conv), - reduction_indices=[1])) + cross_entropy = tf.reduce_mean( + -tf.reduce_sum(y * tf.log(y_conv), reduction_indices=[1])) correct_pred = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1)) return (tf.train.AdamOptimizer(lr).minimize(cross_entropy), tf.reduce_mean(tf.cast(correct_pred, tf.float32)), cross_entropy) @@ -69,8 +71,12 @@ def cnn_setup(x, y, keep_prob, lr, stddev): # Define a remote function that takes a set of hyperparameters as well as the # data, consructs and trains a network, and returns the validation accuracy. @ray.remote -def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels, - validation_images, validation_labels, +def train_cnn_and_compute_accuracy(params, + steps, + train_images, + train_labels, + validation_images, + validation_labels, weights=None): # Extract the hyperparameters from the params dictionary. learning_rate = params["learning_rate"] @@ -90,7 +96,8 @@ def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels, with tf.Session() as sess: # Use the TensorFlowVariables utility. This is only necessary if we # want to set and get the weights. - variables = ray.experimental.TensorFlowVariables(loss, sess) + variables = ray.experimental.tf_utils.TensorFlowVariables( + loss, sess) # Initialize the network weights. sess.run(tf.global_variables_initializer()) # If some network weights were passed in, set those. @@ -102,12 +109,19 @@ def train_cnn_and_compute_accuracy(params, steps, train_images, train_labels, image_batch = get_batch(train_images, i, batch_size) label_batch = get_batch(train_labels, i, batch_size) # Do one step of training. - sess.run(train_step, feed_dict={x: image_batch, y: label_batch, - keep_prob: keep}) + sess.run( + train_step, + feed_dict={ + x: image_batch, + y: label_batch, + keep_prob: keep + }) # Training is done, so compute the validation accuracy and the # current weights and return. - totalacc = accuracy.eval(feed_dict={x: validation_images, - y: validation_labels, - keep_prob: 1.0}) + totalacc = accuracy.eval(feed_dict={ + x: validation_images, + y: validation_labels, + keep_prob: 1.0 + }) new_weights = variables.get_weights() return float(totalacc), new_weights diff --git a/examples/lbfgs/driver.py b/examples/lbfgs/driver.py index 9bdced7c7..0dfd81883 100644 --- a/examples/lbfgs/driver.py +++ b/examples/lbfgs/driver.py @@ -2,14 +2,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import ray import numpy as np -import scipy.optimize -import tensorflow as tf import os +import scipy.optimize +import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data +import ray +import ray.experimental.tf_utils + class LinearModel(object): """Simple class for a one layer neural network. @@ -55,7 +57,7 @@ class LinearModel(object): # In order to get and set the weights, we pass in the loss function to # Ray's TensorFlowVariables to automatically create methods to modify # the weights. - self.variables = ray.experimental.TensorFlowVariables( + self.variables = ray.experimental.tf_utils.TensorFlowVariables( cross_entropy, self.sess) def loss(self, xs, ys): diff --git a/examples/parameter_server/model.py b/examples/parameter_server/model.py index d68cb4713..6387f1bc6 100644 --- a/examples/parameter_server/model.py +++ b/examples/parameter_server/model.py @@ -6,17 +6,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import ray +import time + import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data -import time + +import ray +import ray.experimental.tf_utils def download_mnist_retry(seed=0, max_num_retries=20): for _ in range(max_num_retries): try: - return input_data.read_data_sets("MNIST_data", one_hot=True, - seed=seed) + return input_data.read_data_sets( + "MNIST_data", one_hot=True, seed=seed) except tf.errors.AlreadyExistsError: time.sleep(1) raise Exception("Failed to download MNIST.") @@ -42,30 +45,29 @@ class SimpleCNN(object): with tf.name_scope('adam_optimizer'): self.optimizer = tf.train.AdamOptimizer(learning_rate) - self.train_step = self.optimizer.minimize( - self.cross_entropy) + self.train_step = self.optimizer.minimize(self.cross_entropy) with tf.name_scope('accuracy'): - correct_prediction = tf.equal(tf.argmax(self.y_conv, 1), - tf.argmax(self.y_, 1)) + correct_prediction = tf.equal( + tf.argmax(self.y_conv, 1), tf.argmax(self.y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) self.accuracy = tf.reduce_mean(correct_prediction) - self.sess = tf.Session(config=tf.ConfigProto( - intra_op_parallelism_threads=1, - inter_op_parallelism_threads=1)) + self.sess = tf.Session( + config=tf.ConfigProto( + intra_op_parallelism_threads=1, + inter_op_parallelism_threads=1)) self.sess.run(tf.global_variables_initializer()) # Helper values. - self.variables = ray.experimental.TensorFlowVariables( + self.variables = ray.experimental.tf_utils.TensorFlowVariables( self.cross_entropy, self.sess) - self.grads = self.optimizer.compute_gradients( - self.cross_entropy) - self.grads_placeholder = [ - (tf.placeholder("float", shape=grad[1].get_shape()), grad[1]) - for grad in self.grads] + self.grads = self.optimizer.compute_gradients(self.cross_entropy) + self.grads_placeholder = [(tf.placeholder( + "float", shape=grad[1].get_shape()), grad[1]) + for grad in self.grads] self.apply_grads_placeholder = self.optimizer.apply_gradients( self.grads_placeholder) @@ -73,17 +75,24 @@ class SimpleCNN(object): # TODO(rkn): Computing the weights before and after the training step # and taking the diff is awful. weights = self.get_weights()[1] - self.sess.run(self.train_step, feed_dict={self.x: x, - self.y_: y, - self.keep_prob: 0.5}) + self.sess.run( + self.train_step, + feed_dict={ + self.x: x, + self.y_: y, + self.keep_prob: 0.5 + }) new_weights = self.get_weights()[1] return [x - y for x, y in zip(new_weights, weights)] def compute_gradients(self, x, y): - return self.sess.run([grad[0] for grad in self.grads], - feed_dict={self.x: x, - self.y_: y, - self.keep_prob: 0.5}) + return self.sess.run( + [grad[0] for grad in self.grads], + feed_dict={ + self.x: x, + self.y_: y, + self.keep_prob: 0.5 + }) def apply_gradients(self, gradients): feed_dict = {} @@ -92,10 +101,13 @@ class SimpleCNN(object): self.sess.run(self.apply_grads_placeholder, feed_dict=feed_dict) def compute_accuracy(self, x, y): - return self.sess.run(self.accuracy, - feed_dict={self.x: x, - self.y_: y, - self.keep_prob: 1.0}) + return self.sess.run( + self.accuracy, + feed_dict={ + self.x: x, + self.y_: y, + self.keep_prob: 1.0 + }) def set_weights(self, variable_names, weights): self.variables.set_weights(dict(zip(variable_names, weights))) @@ -175,8 +187,8 @@ def conv2d(x, W): def max_pool_2x2(x): """max_pool_2x2 downsamples a feature map by 2X.""" - return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], - strides=[1, 2, 2, 1], padding='SAME') + return tf.nn.max_pool( + x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') def weight_variable(shape): diff --git a/examples/resnet/resnet_model.py b/examples/resnet/resnet_model.py index 1246f10fc..7fa6dde69 100644 --- a/examples/resnet/resnet_model.py +++ b/examples/resnet/resnet_model.py @@ -13,14 +13,17 @@ from __future__ import print_function from collections import namedtuple import numpy as np -import ray + import tensorflow as tf from tensorflow.python.training import moving_averages -HParams = namedtuple('HParams', - 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' - 'num_residual_units, use_bottleneck, weight_decay_rate, ' - 'relu_leakiness, optimizer, num_gpus') +import ray +import ray.experimental.tf_utils + +HParams = namedtuple( + 'HParams', 'batch_size, num_classes, min_lrn_rate, lrn_rate, ' + 'num_residual_units, use_bottleneck, weight_decay_rate, ' + 'relu_leakiness, optimizer, num_gpus') class ResNet(object): @@ -51,7 +54,8 @@ class ResNet(object): self._build_train_op() else: # Additional initialization for the test network. - self.variables = ray.experimental.TensorFlowVariables(self.cost) + self.variables = ray.experimental.tf_utils.TensorFlowVariables( + self.cost) self.summaries = tf.summary.merge_all() def _stride_arr(self, stride): @@ -75,27 +79,24 @@ class ResNet(object): filters = [16, 16, 32, 64] with tf.variable_scope('unit_1_0'): - x = res_func(x, filters[0], filters[1], - self._stride_arr(strides[0]), - activate_before_residual[0]) + x = res_func(x, filters[0], filters[1], self._stride_arr( + strides[0]), activate_before_residual[0]) for i in range(1, self.hps.num_residual_units): with tf.variable_scope('unit_1_%d' % i): x = res_func(x, filters[1], filters[1], self._stride_arr(1), False) with tf.variable_scope('unit_2_0'): - x = res_func(x, filters[1], filters[2], - self._stride_arr(strides[1]), - activate_before_residual[1]) + x = res_func(x, filters[1], filters[2], self._stride_arr( + strides[1]), activate_before_residual[1]) for i in range(1, self.hps.num_residual_units): with tf.variable_scope('unit_2_%d' % i): - x = res_func(x, filters[2], filters[2], - self._stride_arr(1), False) + x = res_func(x, filters[2], filters[2], self._stride_arr(1), + False) with tf.variable_scope('unit_3_0'): - x = res_func(x, filters[2], filters[3], - self._stride_arr(strides[2]), - activate_before_residual[2]) + x = res_func(x, filters[2], filters[3], self._stride_arr( + strides[2]), activate_before_residual[2]) for i in range(1, self.hps.num_residual_units): with tf.variable_scope('unit_3_%d' % i): x = res_func(x, filters[3], filters[3], self._stride_arr(1), @@ -136,7 +137,8 @@ class ResNet(object): apply_op = optimizer.minimize(self.cost, global_step=self.global_step) train_ops = [apply_op] + self._extra_train_ops self.train_op = tf.group(*train_ops) - self.variables = ray.experimental.TensorFlowVariables(self.train_op) + self.variables = ray.experimental.tf_utils.TensorFlowVariables( + self.train_op) def _batch_norm(self, name, x): """Batch normalization.""" @@ -144,49 +146,65 @@ class ResNet(object): params_shape = [x.get_shape()[-1]] beta = tf.get_variable( - 'beta', params_shape, tf.float32, + 'beta', + params_shape, + tf.float32, initializer=tf.constant_initializer(0.0, tf.float32)) gamma = tf.get_variable( - 'gamma', params_shape, tf.float32, + 'gamma', + params_shape, + tf.float32, initializer=tf.constant_initializer(1.0, tf.float32)) if self.mode == 'train': mean, variance = tf.nn.moments(x, [0, 1, 2], name='moments') moving_mean = tf.get_variable( - 'moving_mean', params_shape, tf.float32, + 'moving_mean', + params_shape, + tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) moving_variance = tf.get_variable( - 'moving_variance', params_shape, tf.float32, + 'moving_variance', + params_shape, + tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) self._extra_train_ops.append( - moving_averages.assign_moving_average(moving_mean, mean, - 0.9)) + moving_averages.assign_moving_average( + moving_mean, mean, 0.9)) self._extra_train_ops.append( - moving_averages.assign_moving_average(moving_variance, - variance, 0.9)) + moving_averages.assign_moving_average( + moving_variance, variance, 0.9)) else: mean = tf.get_variable( - 'moving_mean', params_shape, tf.float32, + 'moving_mean', + params_shape, + tf.float32, initializer=tf.constant_initializer(0.0, tf.float32), trainable=False) variance = tf.get_variable( - 'moving_variance', params_shape, tf.float32, + 'moving_variance', + params_shape, + tf.float32, initializer=tf.constant_initializer(1.0, tf.float32), trainable=False) tf.summary.histogram(mean.op.name, mean) tf.summary.histogram(variance.op.name, variance) # elipson used to be 1e-5. Maybe 0.001 solves NaN problem in deeper # net. - y = tf.nn.batch_normalization( - x, mean, variance, beta, gamma, 0.001) + y = tf.nn.batch_normalization(x, mean, variance, beta, gamma, + 0.001) y.set_shape(x.get_shape()) return y - def _residual(self, x, in_filter, out_filter, stride, + def _residual(self, + x, + in_filter, + out_filter, + stride, activate_before_residual=False): """Residual unit with 2 sub layers.""" if activate_before_residual: @@ -212,14 +230,18 @@ class ResNet(object): if in_filter != out_filter: orig_x = tf.nn.avg_pool(orig_x, stride, stride, 'VALID') orig_x = tf.pad( - orig_x, [[0, 0], [0, 0], [0, 0], - [(out_filter - in_filter) // 2, - (out_filter - in_filter) // 2]]) + orig_x, + [[0, 0], [0, 0], [0, 0], [(out_filter - in_filter) // 2, + (out_filter - in_filter) // 2]]) x += orig_x return x - def _bottleneck_residual(self, x, in_filter, out_filter, stride, + def _bottleneck_residual(self, + x, + in_filter, + out_filter, + stride, activate_before_residual=False): """Bottleneck residual unit with 3 sub layers.""" if activate_before_residual: @@ -271,7 +293,8 @@ class ResNet(object): n = filter_size * filter_size * out_filters kernel = tf.get_variable( 'DW', [filter_size, filter_size, in_filters, out_filters], - tf.float32, initializer=tf.random_normal_initializer( + tf.float32, + initializer=tf.random_normal_initializer( stddev=np.sqrt(2.0 / n))) return tf.nn.conv2d(x, kernel, strides, padding='SAME') @@ -285,8 +308,8 @@ class ResNet(object): w = tf.get_variable( 'DW', [x.get_shape()[1], out_dim], initializer=tf.uniform_unit_scaling_initializer(factor=1.0)) - b = tf.get_variable('biases', [out_dim], - initializer=tf.constant_initializer()) + b = tf.get_variable( + 'biases', [out_dim], initializer=tf.constant_initializer()) return tf.nn.xw_plus_b(x, w, b) def _global_avg_pool(self, x): diff --git a/python/ray/experimental/__init__.py b/python/ray/experimental/__init__.py index 31aea1758..425ff2d93 100644 --- a/python/ray/experimental/__init__.py +++ b/python/ray/experimental/__init__.py @@ -2,7 +2,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from .tfutils import TensorFlowVariables from .features import ( flush_redis_unsafe, flush_task_and_object_metadata_unsafe, flush_finished_tasks_unsafe, flush_evicted_objects_unsafe, @@ -12,6 +11,13 @@ from .gcs_flush_policy import (set_flushing_policy, GcsFlushPolicy, from .named_actors import get_actor, register_actor from .api import get, wait + +def TensorFlowVariables(*args, **kwargs): + raise DeprecationWarning( + "'ray.experimental.TensorFlowVariables' is deprecated. Instead, please" + " do 'from ray.experimental.tf_utils import TensorFlowVariables'.") + + __all__ = [ "TensorFlowVariables", "flush_redis_unsafe", "flush_task_and_object_metadata_unsafe", "flush_finished_tasks_unsafe", diff --git a/python/ray/experimental/sgd/mnist_example.py b/python/ray/experimental/sgd/mnist_example.py index 7c0ff5bd2..836126a9c 100755 --- a/python/ray/experimental/sgd/mnist_example.py +++ b/python/ray/experimental/sgd/mnist_example.py @@ -24,7 +24,7 @@ from ray.tune import run_experiments from ray.tune.examples.tune_mnist_ray import deepnn from ray.experimental.sgd.model import Model from ray.experimental.sgd.sgd import DistributedSGD -from ray.experimental.tfutils import TensorFlowVariables +import ray.experimental.tf_utils parser = argparse.ArgumentParser() parser.add_argument("--redis-address", default=None, type=str) @@ -67,8 +67,8 @@ class MNISTModel(Model): tf.nn.softmax_cross_entropy_with_logits( labels=self.y_, logits=y_conv)) self.optimizer = tf.train.AdamOptimizer(1e-4) - self.variables = TensorFlowVariables(self.loss, - tf.get_default_session()) + self.variables = ray.experimental.tfutils.TensorFlowVariables( + self.loss, tf.get_default_session()) # For evaluating test accuracy correct_prediction = tf.equal( diff --git a/python/ray/experimental/sgd/tfbench/test_model.py b/python/ray/experimental/sgd/tfbench/test_model.py index b0c2fde99..0fe081607 100644 --- a/python/ray/experimental/sgd/tfbench/test_model.py +++ b/python/ray/experimental/sgd/tfbench/test_model.py @@ -6,7 +6,7 @@ import tensorflow as tf from tfbench import model_config from ray.experimental.sgd.model import Model -from ray.experimental.tfutils import TensorFlowVariables +import ray.experimental.tf_utils class MockDataset(): @@ -47,8 +47,8 @@ class TFBenchModel(Model): self.loss = tf.reduce_mean(loss, name='xentropy-loss') self.optimizer = tf.train.GradientDescentOptimizer(1e-6) - self.variables = TensorFlowVariables(self.loss, - tf.get_default_session()) + self.variables = ray.experimental.tf_utils.TensorFlowVariables( + self.loss, tf.get_default_session()) def get_loss(self): return self.loss diff --git a/python/ray/experimental/tfutils.py b/python/ray/experimental/tf_utils.py similarity index 99% rename from python/ray/experimental/tfutils.py rename to python/ray/experimental/tf_utils.py index e33b33cad..bb424134e 100644 --- a/python/ray/experimental/tfutils.py +++ b/python/ray/experimental/tf_utils.py @@ -1,8 +1,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np + from collections import deque, OrderedDict +import numpy as np + +import tensorflow as tf def unflatten(vector, shapes): @@ -45,7 +48,6 @@ class TensorFlowVariables(object): input_variables (List[tf.Variables]): Variables to include in the list. """ - import tensorflow as tf self.sess = sess if not isinstance(output, (list, tuple)): output = [output] diff --git a/python/ray/rllib/agents/ars/policies.py b/python/ray/rllib/agents/ars/policies.py index 27f664655..7c4defd69 100644 --- a/python/ray/rllib/agents/ars/policies.py +++ b/python/ray/rllib/agents/ars/policies.py @@ -10,6 +10,7 @@ import numpy as np import tensorflow as tf import ray +import ray.experimental.tf_utils from ray.rllib.evaluation.sampler import _unbatch_tuple_actions from ray.rllib.utils.filter import get_filter from ray.rllib.models import ModelCatalog @@ -81,7 +82,7 @@ class GenericPolicy(object): dist = dist_class(model.outputs) self.sampler = dist.sample() - self.variables = ray.experimental.TensorFlowVariables( + self.variables = ray.experimental.tf_utils.TensorFlowVariables( model.outputs, self.sess) self.num_params = sum( diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py index 028a2e266..5f3e7dd5d 100644 --- a/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py +++ b/python/ray/rllib/agents/ddpg/ddpg_policy_graph.py @@ -8,8 +8,9 @@ import tensorflow as tf import tensorflow.contrib.layers as layers import ray -from ray.rllib.agents.dqn.dqn_policy_graph import _huber_loss, \ - _minimize_and_clip, _scope_vars, _postprocess_dqn +import ray.experimental.tf_utils +from ray.rllib.agents.dqn.dqn_policy_graph import ( + _huber_loss, _minimize_and_clip, _scope_vars, _postprocess_dqn) from ray.rllib.models import ModelCatalog from ray.rllib.utils.annotations import override from ray.rllib.utils.error import UnsupportedSpaceException @@ -387,7 +388,7 @@ class DDPGPolicyGraph(TFPolicyGraph): # Note that this encompasses both the policy and Q-value networks and # their corresponding target networks - self.variables = ray.experimental.TensorFlowVariables( + self.variables = ray.experimental.tf_utils.TensorFlowVariables( tf.group(q_tp0, q_tp1), self.sess) # Hard initial update diff --git a/python/ray/rllib/agents/es/policies.py b/python/ray/rllib/agents/es/policies.py index cf2da630e..61f748ce0 100644 --- a/python/ray/rllib/agents/es/policies.py +++ b/python/ray/rllib/agents/es/policies.py @@ -10,6 +10,7 @@ import numpy as np import tensorflow as tf import ray +import ray.experimental.tf_utils from ray.rllib.evaluation.sampler import _unbatch_tuple_actions from ray.rllib.models import ModelCatalog from ray.rllib.utils.filter import get_filter @@ -59,7 +60,7 @@ class GenericPolicy(object): dist = dist_class(model.outputs) self.sampler = dist.sample() - self.variables = ray.experimental.TensorFlowVariables( + self.variables = ray.experimental.tf_utils.TensorFlowVariables( model.outputs, self.sess) self.num_params = sum( diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/python/ray/rllib/evaluation/tf_policy_graph.py index 3271b49e6..232b869ec 100644 --- a/python/ray/rllib/evaluation/tf_policy_graph.py +++ b/python/ray/rllib/evaluation/tf_policy_graph.py @@ -9,6 +9,7 @@ import tensorflow as tf import numpy as np import ray +import ray.experimental.tf_utils from ray.rllib.evaluation.policy_graph import PolicyGraph from ray.rllib.models.lstm import chop_into_sequences from ray.rllib.utils.annotations import override, DeveloperAPI @@ -120,7 +121,7 @@ class TFPolicyGraph(PolicyGraph): for (g, v) in self.gradients(self._optimizer) if g is not None] self._grads = [g for (g, v) in self._grads_and_vars] - self._variables = ray.experimental.TensorFlowVariables( + self._variables = ray.experimental.tf_utils.TensorFlowVariables( self._loss, self._sess) # gather update ops for any batch norm layers diff --git a/python/ray/tests/test_tensorflow.py b/python/ray/tests/test_tensorflow.py index f37d81b79..258ca0124 100644 --- a/python/ray/tests/test_tensorflow.py +++ b/python/ray/tests/test_tensorflow.py @@ -7,6 +7,7 @@ import pytest import tensorflow as tf import ray +import ray.experimental.tf_utils def make_linear_network(w_name=None, b_name=None): @@ -31,7 +32,7 @@ class LossActor(object): loss, init, _, _ = make_linear_network() sess = tf.Session() # Additional code for setting and getting the weights. - weights = ray.experimental.TensorFlowVariables( + weights = ray.experimental.tf_utils.TensorFlowVariables( loss if use_loss else None, sess, input_variables=var) # Return all of the data needed to use the network. self.values = [weights, init, sess] @@ -53,7 +54,8 @@ class NetActor(object): loss, init, _, _ = make_linear_network() sess = tf.Session() # Additional code for setting and getting the weights. - variables = ray.experimental.TensorFlowVariables(loss, sess) + variables = ray.experimental.tf_utils.TensorFlowVariables( + loss, sess) # Return all of the data needed to use the network. self.values = [variables, init, sess] sess.run(init) @@ -73,7 +75,8 @@ class TrainActor(object): with tf.Graph().as_default(): loss, init, x_data, y_data = make_linear_network() sess = tf.Session() - variables = ray.experimental.TensorFlowVariables(loss, sess) + variables = ray.experimental.tf_utils.TensorFlowVariables( + loss, sess) optimizer = tf.train.GradientDescentOptimizer(0.9) grads = optimizer.compute_gradients(loss) train = optimizer.apply_gradients(grads) @@ -107,7 +110,7 @@ def test_tensorflow_variables(ray_start_regular): loss, init, _, _ = make_linear_network() sess.run(init) - variables = ray.experimental.TensorFlowVariables(loss, sess) + variables = ray.experimental.tf_utils.TensorFlowVariables(loss, sess) weights = variables.get_weights() for (name, val) in weights.items(): @@ -119,7 +122,7 @@ def test_tensorflow_variables(ray_start_regular): loss2, init2, _, _ = make_linear_network("w", "b") sess.run(init2) - variables2 = ray.experimental.TensorFlowVariables(loss2, sess) + variables2 = ray.experimental.tf_utils.TensorFlowVariables(loss2, sess) weights2 = variables2.get_weights() for (name, val) in weights2.items(): @@ -131,7 +134,7 @@ def test_tensorflow_variables(ray_start_regular): variables2.set_flat(flat_weights) assert_almost_equal(flat_weights, variables2.get_flat()) - variables3 = ray.experimental.TensorFlowVariables([loss2]) + variables3 = ray.experimental.tf_utils.TensorFlowVariables([loss2]) assert variables3.sess is None sess = tf.Session() variables3.set_session(sess) @@ -205,7 +208,7 @@ def test_network_driver_worker_independent(ray_start_regular): # Create a network on the driver locally. sess1 = tf.Session() loss1, init1, _, _ = make_linear_network() - ray.experimental.TensorFlowVariables(loss1, sess1) + ray.experimental.tf_utils.TensorFlowVariables(loss1, sess1) sess1.run(init1) net2 = ray.remote(NetActor).remote() @@ -221,7 +224,7 @@ def test_variables_control_dependencies(ray_start_regular): sess = tf.Session() loss, init, _, _ = make_linear_network() minimizer = tf.train.MomentumOptimizer(0.9, 0.9).minimize(loss) - net_vars = ray.experimental.TensorFlowVariables(minimizer, sess) + net_vars = ray.experimental.tf_utils.TensorFlowVariables(minimizer, sess) sess.run(init) # Tests if all variables are properly retrieved, 2 variables and 2