From 1650f7b17400e60d6748bd8a2e1e4a18a0308667 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Sun, 13 Oct 2019 20:33:56 -0700 Subject: [PATCH] =?UTF-8?q?[tune]=20Remove=20TF=20MNIST=20example=20+=20ad?= =?UTF-8?q?d=20TrialRunner=20hook=20to=20execut=E2=80=A6=20(#5868)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * remove test * add trial runner * remvoerestore * Remove other mnist examples * tunetest * revert * v1 * Revert "v1" This reverts commit c8bddaf2db7a8270c43c02021cac0e75df15ed20. * Revert "revert" This reverts commit b58f56884a0c288d3a6f997d149ab4d496ddd7a3. * errors * format --- ci/jenkins_tests/run_tune_tests.sh | 14 +- doc/source/tune-examples.rst | 14 +- python/ray/tune/examples/README.rst | 14 +- python/ray/tune/examples/pbt_memnn_example.py | 18 +- .../examples/tune_mnist_async_hyperband.py | 247 ------------------ python/ray/tune/examples/tune_mnist_ray.py | 241 ----------------- .../tune/examples/tune_mnist_ray_hyperband.py | 241 ----------------- python/ray/tune/ray_trial_executor.py | 2 +- python/ray/tune/result.py | 6 +- python/ray/tune/tests/test_trial_runner.py | 4 +- python/ray/tune/tests/test_tune_restore.py | 5 - python/ray/tune/trial_executor.py | 4 +- python/ray/tune/trial_runner.py | 4 +- 13 files changed, 26 insertions(+), 788 deletions(-) delete mode 100755 python/ray/tune/examples/tune_mnist_async_hyperband.py delete mode 100755 python/ray/tune/examples/tune_mnist_ray.py delete mode 100755 python/ray/tune/examples/tune_mnist_ray_hyperband.py diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh index e03ef1d49..301bebe9c 100755 --- a/ci/jenkins_tests/run_tune_tests.sh +++ b/ci/jenkins_tests/run_tune_tests.sh @@ -46,10 +46,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/tune/tests/tutorial.py -$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - python /ray/python/ray/tune/examples/tune_mnist_ray.py \ - --smoke-test - $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/tune/examples/pbt_example.py \ --smoke-test @@ -68,14 +64,6 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ bash -c 'pip install tensorflow==1.15.0rc1 && python /ray/python/ray/tune/examples/async_hyperband_example.py --smoke-test' -$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \ - --smoke-test - -$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - python /ray/python/ray/tune/examples/tune_mnist_async_hyperband.py \ - --smoke-test - $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/tune/examples/lightgbm_example.py @@ -126,7 +114,7 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --smoke-test $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - python /ray/python/ray/tune/examples/memnn_example.py \ + python /ray/python/ray/tune/examples/pbt_memnn_example.py \ --smoke-test # uncomment once statsmodels is updated. diff --git a/doc/source/tune-examples.rst b/doc/source/tune-examples.rst index 5fcd7035a..c4971c3e6 100644 --- a/doc/source/tune-examples.rst +++ b/doc/source/tune-examples.rst @@ -16,7 +16,6 @@ General Examples - `pbt_example `__: Example of using a Trainable class with PopulationBasedTraining scheduler. - `pbt_ppo_example `__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler. - `logging_example `__: Example of custom loggers and custom trial directory naming. -- `pbt_memnn_example `__: Example of training a Memory NN on bAbI with Keras using PBT. Search Algorithm Examples ------------------------- @@ -26,10 +25,11 @@ Search Algorithm Examples - `Nevergrad example `__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad `_ with 4 parallel workers. - `Bayesian Optimization example `__: Optimize a simple toy function using `Bayesian Optimization `_ with 4 parallel workers. -Keras Examples --------------- +Tensorflow/Keras Examples +------------------------- - `tune_mnist_keras `__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune. +- `pbt_memnn_example `__: Example of training a Memory NN on bAbI with Keras using PBT. PyTorch Examples @@ -39,14 +39,6 @@ PyTorch Examples - `mnist_pytorch_trainable `__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end. -TensorFlow Examples -------------------- - -- `tune_mnist_ray `__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class. -- `tune_mnist_ray_hyperband `__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler. -- `tune_mnist_async_hyperband `__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand. - - XGBoost Example --------------- diff --git a/python/ray/tune/examples/README.rst b/python/ray/tune/examples/README.rst index 68b214cdb..f72442307 100644 --- a/python/ray/tune/examples/README.rst +++ b/python/ray/tune/examples/README.rst @@ -16,7 +16,6 @@ General Examples - `pbt_example `__: Example of using a Trainable class with PopulationBasedTraining scheduler. - `pbt_ppo_example `__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler. - `logging_example `__: Example of custom loggers and custom trial directory naming. -- `pbt_memnn_example `__: Example of training a Memory NN on bAbI with Keras using PBT. Search Algorithm Examples ------------------------- @@ -26,10 +25,11 @@ Search Algorithm Examples - `Nevergrad example `__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad `_ with 4 parallel workers. - `Bayesian Optimization example `__: Optimize a simple toy function using `Bayesian Optimization `_ with 4 parallel workers. -Keras Examples --------------- +Tensorflow/Keras Examples +------------------------- - `tune_mnist_keras `__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune. +- `pbt_memnn_example `__: Example of training a Memory NN on bAbI with Keras using PBT. PyTorch Examples @@ -39,14 +39,6 @@ PyTorch Examples - `mnist_pytorch_trainable `__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end. -TensorFlow Examples -------------------- - -- `tune_mnist_ray `__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class. -- `tune_mnist_ray_hyperband `__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler. -- `tune_mnist_async_hyperband `__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand. - - XGBoost Example --------------- diff --git a/python/ray/tune/examples/pbt_memnn_example.py b/python/ray/tune/examples/pbt_memnn_example.py index c9ab4cc1c..f19b22b65 100644 --- a/python/ray/tune/examples/pbt_memnn_example.py +++ b/python/ray/tune/examples/pbt_memnn_example.py @@ -5,15 +5,15 @@ References Keras and is based off of https://keras.io/examples/babi_memnn/. from __future__ import print_function -from tensorflow.python.keras.models import Sequential, Model, load_model -from tensorflow.python.keras.layers.embeddings import Embedding -from tensorflow.python.keras.layers import (Input, Activation, Dense, Permute, - Dropout) -from tensorflow.python.keras.layers import add, dot, concatenate -from tensorflow.python.keras.layers import LSTM -from tensorflow.python.keras.optimizers import RMSprop -from tensorflow.python.keras.utils.data_utils import get_file -from tensorflow.python.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.models import Sequential, Model, load_model +from tensorflow.keras.layers import Embedding +from tensorflow.keras.layers import (Input, Activation, Dense, Permute, + Dropout) +from tensorflow.keras.layers import add, dot, concatenate +from tensorflow.keras.layers import LSTM +from tensorflow.keras.optimizers import RMSprop +from tensorflow.keras.utils import get_file +from tensorflow.keras.preprocessing.sequence import pad_sequences from ray.tune import Trainable import argparse import tarfile diff --git a/python/ray/tune/examples/tune_mnist_async_hyperband.py b/python/ray/tune/examples/tune_mnist_async_hyperband.py deleted file mode 100755 index b58ac0353..000000000 --- a/python/ray/tune/examples/tune_mnist_async_hyperband.py +++ /dev/null @@ -1,247 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A deep MNIST classifier using convolutional layers. - -See extensive documentation at -https://www.tensorflow.org/get_started/mnist/pros -""" -# Disable linter warnings to maintain consistency with tutorial. -# pylint: disable=invalid-name -# pylint: disable=g-bad-import-order - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import tempfile -import time - -import ray -from ray.tune import grid_search, run - -from tensorflow.examples.tutorials.mnist import input_data - -import tensorflow as tf - -FLAGS = None -status_reporter = None # used to report training status back to Ray -activation_fn = None # e.g. tf.nn.relu - - -def deepnn(x): - """deepnn builds the graph for a deep net for classifying digits. - - Args: - x: an input tensor with the dimensions (N_examples, 784), where 784 is - the number of pixels in a standard MNIST image. - - Returns: - A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with - values equal to the logits of classifying the digit into one of 10 - classes (the digits 0-9). keep_prob is a scalar placeholder for the - probability of dropout. - """ - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images - # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope("reshape"): - x_image = tf.reshape(x, [-1, 28, 28, 1]) - - # First convolutional layer - maps one grayscale image to 32 feature maps. - with tf.name_scope("conv1"): - W_conv1 = weight_variable([5, 5, 1, 32]) - b_conv1 = bias_variable([32]) - h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1) - - # Pooling layer - downsamples by 2X. - with tf.name_scope("pool1"): - h_pool1 = max_pool_2x2(h_conv1) - - # Second convolutional layer -- maps 32 feature maps to 64. - with tf.name_scope("conv2"): - W_conv2 = weight_variable([5, 5, 32, 64]) - b_conv2 = bias_variable([64]) - h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2) - - # Second pooling layer. - with tf.name_scope("pool2"): - h_pool2 = max_pool_2x2(h_conv2) - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - with tf.name_scope("fc1"): - W_fc1 = weight_variable([7 * 7 * 64, 1024]) - b_fc1 = bias_variable([1024]) - - h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) - h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of - # features. - with tf.name_scope("dropout"): - keep_prob = tf.placeholder(tf.float32) - h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope("fc2"): - W_fc2 = weight_variable([1024, 10]) - b_fc2 = bias_variable([10]) - - y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 - return y_conv, keep_prob - - -def conv2d(x, W): - """conv2d returns a 2d convolution layer with full stride.""" - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME") - - -def max_pool_2x2(x): - """max_pool_2x2 downsamples a feature map by 2X.""" - return tf.nn.max_pool( - x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - - -def main(_): - # Import data - for _ in range(10): - try: - mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) - break - except Exception: - time.sleep(5) - - # Create the model - x = tf.placeholder(tf.float32, [None, 784]) - - # Define loss and optimizer - y_ = tf.placeholder(tf.float32, [None, 10]) - - # Build the graph for the deep net - y_conv, keep_prob = deepnn(x) - - with tf.name_scope("loss"): - cross_entropy = tf.nn.softmax_cross_entropy_with_logits( - labels=y_, logits=y_conv) - cross_entropy = tf.reduce_mean(cross_entropy) - - with tf.name_scope("adam_optimizer"): - train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) - - with tf.name_scope("accuracy"): - correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) - correct_prediction = tf.cast(correct_prediction, tf.float32) - accuracy = tf.reduce_mean(correct_prediction) - - graph_location = tempfile.mkdtemp() - print("Saving graph to: %s" % graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(20000): - batch = mnist.train.next_batch(50) - if i % 10 == 0: - train_accuracy = accuracy.eval(feed_dict={ - x: batch[0], - y_: batch[1], - keep_prob: 1.0 - }) - - # !!! Report status to ray.tune !!! - if status_reporter: - status_reporter( - timesteps_total=i, mean_accuracy=train_accuracy) - - print("step %d, training accuracy %g" % (i, train_accuracy)) - train_step.run(feed_dict={ - x: batch[0], - y_: batch[1], - keep_prob: 0.5 - }) - - print("test accuracy %g" % accuracy.eval(feed_dict={ - x: mnist.test.images, - y_: mnist.test.labels, - keep_prob: 1.0 - })) - - -# !!! Entrypoint for ray.tune !!! -def train(config={"activation": "relu"}, reporter=None): - global FLAGS, status_reporter, activation_fn - status_reporter = reporter - activation_fn = getattr(tf.nn, config["activation"]) - parser = argparse.ArgumentParser() - parser.add_argument( - "--data_dir", - type=str, - default="/tmp/tensorflow/mnist/input_data", - help="Directory for storing input data") - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) - - -# !!! Example of using the ray.tune Python API !!! -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--smoke-test", action="store_true", help="Finish quickly for testing") - args, _ = parser.parse_known_args() - - mnist_spec = { - "num_samples": 10, - "stop": { - "mean_accuracy": 0.99, - "timesteps_total": 600, - }, - "config": { - "activation": grid_search(["relu", "elu", "tanh"]), - }, - } - - if args.smoke_test: - mnist_spec["stop"]["training_iteration"] = 2 - mnist_spec["num_samples"] = 1 - - ray.init() - - from ray.tune.schedulers import AsyncHyperBandScheduler - run(train, - name="tune_mnist_test", - scheduler=AsyncHyperBandScheduler( - time_attr="timesteps_total", - metric="mean_accuracy", - mode="max", - max_t=600, - ), - **mnist_spec) diff --git a/python/ray/tune/examples/tune_mnist_ray.py b/python/ray/tune/examples/tune_mnist_ray.py deleted file mode 100755 index 3807cc0d2..000000000 --- a/python/ray/tune/examples/tune_mnist_ray.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A deep MNIST classifier using convolutional layers. - -See extensive documentation at -https://www.tensorflow.org/get_started/mnist/pros -""" -# Disable linter warnings to maintain consistency with tutorial. -# pylint: disable=invalid-name -# pylint: disable=g-bad-import-order - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import tempfile -import time - -import ray -from ray import tune -from ray.tune import grid_search, register_trainable - -from tensorflow.examples.tutorials.mnist import input_data -import numpy as np - -import tensorflow as tf - -FLAGS = None -status_reporter = None # used to report training status back to Ray -activation_fn = tf.nn.relu # e.g. tf.nn.relu - - -def deepnn(x): - """deepnn builds the graph for a deep net for classifying digits. - - Args: - x: an input tensor with the dimensions (N_examples, 784), where 784 is - the number of pixels in a standard MNIST image. - - Returns: - A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with - values equal to the logits of classifying the digit into one of 10 - classes (the digits 0-9). keep_prob is a scalar placeholder for the - probability of dropout. - """ - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images - # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope("reshape"): - x_image = tf.reshape(x, [-1, 28, 28, 1]) - - # First convolutional layer - maps one grayscale image to 32 feature maps. - with tf.name_scope("conv1"): - W_conv1 = weight_variable([5, 5, 1, 32]) - b_conv1 = bias_variable([32]) - h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1) - - # Pooling layer - downsamples by 2X. - with tf.name_scope("pool1"): - h_pool1 = max_pool_2x2(h_conv1) - - # Second convolutional layer -- maps 32 feature maps to 64. - with tf.name_scope("conv2"): - W_conv2 = weight_variable([5, 5, 32, 64]) - b_conv2 = bias_variable([64]) - h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2) - - # Second pooling layer. - with tf.name_scope("pool2"): - h_pool2 = max_pool_2x2(h_conv2) - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - with tf.name_scope("fc1"): - W_fc1 = weight_variable([7 * 7 * 64, 1024]) - b_fc1 = bias_variable([1024]) - - h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) - h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of - # features. - with tf.name_scope("dropout"): - keep_prob = tf.placeholder(tf.float32) - h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope("fc2"): - W_fc2 = weight_variable([1024, 10]) - b_fc2 = bias_variable([10]) - - y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 - return y_conv, keep_prob - - -def conv2d(x, W): - """conv2d returns a 2d convolution layer with full stride.""" - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME") - - -def max_pool_2x2(x): - """max_pool_2x2 downsamples a feature map by 2X.""" - return tf.nn.max_pool( - x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - - -def main(_): - # Import data - for _ in range(10): - try: - mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) - break - except Exception: - time.sleep(5) - - # Create the model - x = tf.placeholder(tf.float32, [None, 784]) - - # Define loss and optimizer - y_ = tf.placeholder(tf.float32, [None, 10]) - - # Build the graph for the deep net - y_conv, keep_prob = deepnn(x) - - with tf.name_scope("loss"): - cross_entropy = tf.nn.softmax_cross_entropy_with_logits( - labels=y_, logits=y_conv) - cross_entropy = tf.reduce_mean(cross_entropy) - - with tf.name_scope("adam_optimizer"): - train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) - - with tf.name_scope("accuracy"): - correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) - correct_prediction = tf.cast(correct_prediction, tf.float32) - accuracy = tf.reduce_mean(correct_prediction) - - graph_location = tempfile.mkdtemp() - print("Saving graph to: %s" % graph_location) - train_writer = tf.summary.FileWriter(graph_location) - train_writer.add_graph(tf.get_default_graph()) - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - for i in range(20000): - batch = mnist.train.next_batch(50) - if i % 10 == 0: - train_accuracy = accuracy.eval(feed_dict={ - x: batch[0], - y_: batch[1], - keep_prob: 1.0 - }) - - # !!! Report status to ray.tune !!! - if status_reporter: - status_reporter( - timesteps_total=i, mean_accuracy=train_accuracy) - - print("step %d, training accuracy %g" % (i, train_accuracy)) - train_step.run(feed_dict={ - x: batch[0], - y_: batch[1], - keep_prob: 0.5 - }) - - print("test accuracy %g" % accuracy.eval(feed_dict={ - x: mnist.test.images, - y_: mnist.test.labels, - keep_prob: 1.0 - })) - - -# !!! Entrypoint for ray.tune !!! -def train(config={"activation": "relu"}, reporter=None): - global FLAGS, status_reporter, activation_fn - status_reporter = reporter - activation_fn = getattr(tf.nn, config["activation"]) - parser = argparse.ArgumentParser() - parser.add_argument( - "--data_dir", - type=str, - default="/tmp/tensorflow/mnist/input_data", - help="Directory for storing input data") - FLAGS, unparsed = parser.parse_known_args() - tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) - - -# !!! Example of using the ray.tune Python API !!! -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--smoke-test", action="store_true", help="Finish quickly for testing") - args, _ = parser.parse_known_args() - - register_trainable("train_mnist", train) - mnist_spec = { - "stop": { - "mean_accuracy": 0.99, - "time_total_s": 600, - }, - "config": { - "activation": grid_search(["relu", "elu", "tanh"]), - # You can pass any serializable object as well - "foo": grid_search([np.array([1, 2]), - np.array([2, 3])]), - }, - } - - if args.smoke_test: - mnist_spec["stop"]["training_iteration"] = 2 - - ray.init() - tune.run("train_mnist", name="tune_mnist_test", **mnist_spec) diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py deleted file mode 100755 index 5e5b6cd70..000000000 --- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env python -# -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A deep MNIST classifier using convolutional layers. -See extensive documentation at -https://www.tensorflow.org/get_started/mnist/pros -""" -# Disable linter warnings to maintain consistency with tutorial. -# pylint: disable=invalid-name -# pylint: disable=g-bad-import-order - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import os -import time - -import ray -from ray import tune -from ray.tune import Trainable, sample_from -from ray.tune.schedulers import HyperBandScheduler -from tensorflow.examples.tutorials.mnist import input_data - -import tensorflow as tf -import numpy as np - -activation_fn = None # e.g. tf.nn.relu - - -def setupCNN(x): - """setupCNN builds the graph for a deep net for classifying digits. - Args: - x: an input tensor with the dimensions (N_examples, 784), where 784 is - the number of pixels in a standard MNIST image. - Returns: - A tuple (y, keep_prob). y is a tensor of shape (N_examples, 10), with - values equal to the logits of classifying the digit into one of 10 - classes (the digits 0-9). keep_prob is a scalar placeholder for the - probability of dropout. - """ - # Reshape to use within a convolutional neural net. - # Last dimension is for "features" - there is only one here, since images - # are grayscale -- it would be 3 for an RGB image, 4 for RGBA, etc. - with tf.name_scope("reshape"): - x_image = tf.reshape(x, [-1, 28, 28, 1]) - - # First convolutional layer - maps one grayscale image to 32 feature maps. - with tf.name_scope("conv1"): - W_conv1 = weight_variable([5, 5, 1, 32]) - b_conv1 = bias_variable([32]) - h_conv1 = activation_fn(conv2d(x_image, W_conv1) + b_conv1) - - # Pooling layer - downsamples by 2X. - with tf.name_scope("pool1"): - h_pool1 = max_pool_2x2(h_conv1) - - # Second convolutional layer -- maps 32 feature maps to 64. - with tf.name_scope("conv2"): - W_conv2 = weight_variable([5, 5, 32, 64]) - b_conv2 = bias_variable([64]) - h_conv2 = activation_fn(conv2d(h_pool1, W_conv2) + b_conv2) - - # Second pooling layer. - with tf.name_scope("pool2"): - h_pool2 = max_pool_2x2(h_conv2) - - # Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image - # is down to 7x7x64 feature maps -- maps this to 1024 features. - with tf.name_scope("fc1"): - W_fc1 = weight_variable([7 * 7 * 64, 1024]) - b_fc1 = bias_variable([1024]) - - h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) - h_fc1 = activation_fn(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) - - # Dropout - controls the complexity of the model, prevents co-adaptation of - # features. - with tf.name_scope("dropout"): - keep_prob = tf.placeholder(tf.float32) - h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) - - # Map the 1024 features to 10 classes, one for each digit - with tf.name_scope("fc2"): - W_fc2 = weight_variable([1024, 10]) - b_fc2 = bias_variable([10]) - - y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 - return y_conv, keep_prob - - -def conv2d(x, W): - """conv2d returns a 2d convolution layer with full stride.""" - return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME") - - -def max_pool_2x2(x): - """max_pool_2x2 downsamples a feature map by 2X.""" - return tf.nn.max_pool( - x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME") - - -def weight_variable(shape): - """weight_variable generates a weight variable of a given shape.""" - initial = tf.truncated_normal(shape, stddev=0.1) - return tf.Variable(initial) - - -def bias_variable(shape): - """bias_variable generates a bias variable of a given shape.""" - initial = tf.constant(0.1, shape=shape) - return tf.Variable(initial) - - -class TrainMNIST(Trainable): - """Example MNIST trainable.""" - - def _setup(self, config): - global activation_fn - - self.timestep = 0 - - # Import data - for _ in range(10): - try: - self.mnist = input_data.read_data_sets( - "/tmp/mnist_ray_demo", one_hot=True) - break - except Exception as e: - print("Error loading data, retrying", e) - time.sleep(5) - - assert self.mnist - - self.x = tf.placeholder(tf.float32, [None, 784]) - self.y_ = tf.placeholder(tf.float32, [None, 10]) - - activation_fn = getattr(tf.nn, config.get("activation", "relu")) - - # Build the graph for the deep net - y_conv, self.keep_prob = setupCNN(self.x) - - with tf.name_scope("loss"): - cross_entropy = tf.nn.softmax_cross_entropy_with_logits( - labels=self.y_, logits=y_conv) - cross_entropy = tf.reduce_mean(cross_entropy) - - with tf.name_scope("adam_optimizer"): - train_step = tf.train.AdamOptimizer( - config.get("learning_rate", 1e-4)).minimize(cross_entropy) - - self.train_step = train_step - - with tf.name_scope("accuracy"): - correct_prediction = tf.equal( - tf.argmax(y_conv, 1), tf.argmax(self.y_, 1)) - correct_prediction = tf.cast(correct_prediction, tf.float32) - self.accuracy = tf.reduce_mean(correct_prediction) - - self.sess = tf.Session() - self.sess.run(tf.global_variables_initializer()) - self.saver = tf.train.Saver(save_relative_paths=True) - - def _train(self): - for i in range(10): - batch = self.mnist.train.next_batch(50) - self.sess.run( - self.train_step, - feed_dict={ - self.x: batch[0], - self.y_: batch[1], - self.keep_prob: 0.5 - }) - - batch = self.mnist.train.next_batch(50) - train_accuracy = self.sess.run( - self.accuracy, - feed_dict={ - self.x: batch[0], - self.y_: batch[1], - self.keep_prob: 1.0 - }) - return {"mean_accuracy": train_accuracy} - - def _save(self, checkpoint_dir): - path = self.saver.save(self.sess, os.path.join(checkpoint_dir, "save")) - return path - - def _restore(self, checkpoint_path): - self.saver.restore(self.sess, checkpoint_path) - - -# !!! Example of using the ray.tune Python API !!! -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--smoke-test", action="store_true", help="Finish quickly for testing") - args, _ = parser.parse_known_args() - mnist_spec = { - "stop": { - "mean_accuracy": 0.99, - "time_total_s": 600, - }, - "config": { - "learning_rate": sample_from( - lambda spec: 10**np.random.uniform(-5, -3)), - "activation": "relu", - }, - "num_samples": 10, - } - - if args.smoke_test: - mnist_spec["stop"]["training_iteration"] = 20 - mnist_spec["num_samples"] = 1 - - ray.init() - hyperband = HyperBandScheduler( - time_attr="training_iteration", - metric="mean_accuracy", - mode="max", - max_t=10) - - tune.run( - TrainMNIST, - name="mnist_hyperband_test", - scheduler=hyperband, - **mnist_spec) diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py index b3ce9dd34..7dcbeed5d 100644 --- a/python/ray/tune/ray_trial_executor.py +++ b/python/ray/tune/ray_trial_executor.py @@ -523,7 +523,7 @@ class RayTrialExecutor(TrialExecutor): else: return "? CPUs, ? GPUs" - def on_step_begin(self): + def on_step_begin(self, trial_runner): """Before step() called, update the available resources.""" self._update_avail_resources() diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py index 81a7f36ef..9883eb735 100644 --- a/python/ray/tune/result.py +++ b/python/ray/tune/result.py @@ -39,20 +39,20 @@ MEAN_ACCURACY = "mean_accuracy" # Number of episodes in this iteration. EPISODES_THIS_ITER = "episodes_this_iter" -# (Optional/Auto-filled) Accumulated number of episodes for this experiment. +# (Optional/Auto-filled) Accumulated number of episodes for this trial. EPISODES_TOTAL = "episodes_total" # Number of timesteps in this iteration. TIMESTEPS_THIS_ITER = "timesteps_this_iter" -# (Auto-filled) Accumulated number of timesteps for this entire experiment. +# (Auto-filled) Accumulated number of timesteps for this entire trial. TIMESTEPS_TOTAL = "timesteps_total" # (Auto-filled) Time in seconds this iteration took to run. # This may be overriden to override the system-computed time difference. TIME_THIS_ITER_S = "time_this_iter_s" -# (Auto-filled) Accumulated time in seconds for this entire experiment. +# (Auto-filled) Accumulated time in seconds for this entire trial. TIME_TOTAL_S = "time_total_s" # (Auto-filled) The index of this training iteration. diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py index 64c5fbea1..984418048 100644 --- a/python/ray/tune/tests/test_trial_runner.py +++ b/python/ray/tune/tests/test_trial_runner.py @@ -2081,12 +2081,12 @@ class TrialRunnerTest(unittest.TestCase): ray.init(num_cpus=4, num_gpus=2) runner = TrialRunner() - def on_step_begin(self): + def on_step_begin(self, trialrunner): self._update_avail_resources() cnt = self.pre_step if hasattr(self, "pre_step") else 0 setattr(self, "pre_step", cnt + 1) - def on_step_end(self): + def on_step_end(self, trialrunner): cnt = self.pre_step if hasattr(self, "post_step") else 0 setattr(self, "post_step", 1 + cnt) diff --git a/python/ray/tune/tests/test_tune_restore.py b/python/ray/tune/tests/test_tune_restore.py index a9595f386..769c1bdae 100644 --- a/python/ray/tune/tests/test_tune_restore.py +++ b/python/ray/tune/tests/test_tune_restore.py @@ -71,11 +71,6 @@ class TuneExampleTest(unittest.TestCase): ray.shutdown() _register_all() - def testTensorFlowMNIST(self): - from ray.tune.examples.tune_mnist_ray_hyperband import TrainMNIST - validate_save_restore(TrainMNIST) - validate_save_restore(TrainMNIST, use_object_store=True) - def testPBTKeras(self): from ray.tune.examples.pbt_tune_cifar10_with_keras import Cifar10Model from tensorflow.python.keras.datasets import cifar10 diff --git a/python/ray/tune/trial_executor.py b/python/ray/tune/trial_executor.py index 1364ddebe..9e53e7108 100644 --- a/python/ray/tune/trial_executor.py +++ b/python/ray/tune/trial_executor.py @@ -142,11 +142,11 @@ class TrialExecutor(object): raise NotImplementedError("Subclasses of TrialExecutor must provide " "get_running_trials() method") - def on_step_begin(self): + def on_step_begin(self, trial_runner): """A hook called before running one step of the trial event loop.""" pass - def on_step_end(self): + def on_step_end(self, trial_runner): """A hook called after running one step of the trial event loop.""" pass diff --git a/python/ray/tune/trial_runner.py b/python/ray/tune/trial_runner.py index c885f82b9..31c2c87bd 100644 --- a/python/ray/tune/trial_runner.py +++ b/python/ray/tune/trial_runner.py @@ -326,7 +326,7 @@ class TrialRunner(object): if self.is_finished(): raise TuneError("Called step when all trials finished?") with warn_if_slow("on_step_begin"): - self.trial_executor.on_step_begin() + self.trial_executor.on_step_begin(self) next_trial = self._get_next_trial() # blocking if next_trial is not None: with warn_if_slow("start_trial"): @@ -367,7 +367,7 @@ class TrialRunner(object): if self.is_finished(): self._server.shutdown() with warn_if_slow("on_step_end"): - self.trial_executor.on_step_end() + self.trial_executor.on_step_end(self) def get_trial(self, tid): trial = [t for t in self._trials if t.trial_id == tid]