From e00071721af04d2e7b3c8bd721bc4cc2809d68b0 Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Sat, 21 Sep 2019 17:01:14 -0700
Subject: [PATCH] [tune] tf2.0 testing and supporting callables (#5738)

---
 ci/jenkins_tests/run_tune_tests.sh           |  8 +++++++
 python/ray/tune/examples/tune_mnist_keras.py | 17 +++++++-------
 python/ray/tune/experiment.py                | 24 +++++++++-----------
 python/ray/tune/integration/keras.py         |  6 ++++-
 python/ray/tune/logger.py                    |  6 ++---
 python/ray/tune/tests/test_trial_runner.py   |  9 +++++++-
 6 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh
index 326cf5543..c1e348254 100755
--- a/ci/jenkins_tests/run_tune_tests.sh
+++ b/ci/jenkins_tests/run_tune_tests.sh
@@ -62,6 +62,14 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
     python /ray/python/ray/tune/examples/async_hyperband_example.py \
     --smoke-test
 
+$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    pip install tensorflow==2.0.0rc1 && python /ray/python/ray/tune/examples/async_hyperband_example.py \
+    --smoke-test
+
+$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    pip install tensorflow==1.15.0rc1 && python /ray/python/ray/tune/examples/async_hyperband_example.py \
+    --smoke-test
+
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
     python /ray/python/ray/tune/examples/tune_mnist_ray_hyperband.py \
     --smoke-test
diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py
index ecd3c34bc..b3a13884a 100644
--- a/python/ray/tune/examples/tune_mnist_keras.py
+++ b/python/ray/tune/examples/tune_mnist_keras.py
@@ -4,13 +4,10 @@ from __future__ import print_function
 
 import argparse
 import numpy as np
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import (Dense, Dropout, Flatten, Conv2D, MaxPooling2D)
+from tensorflow.keras.datasets import mnist
 
 from ray.tune.integration.keras import TuneReporterCallback
-from ray.tune.examples.utils import get_mnist_data, set_keras_threads
+from ray.tune.examples.utils import get_mnist_data
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
@@ -19,7 +16,11 @@ args, _ = parser.parse_known_args()
 
 
 def train_mnist(config, reporter):
-    set_keras_threads(config["threads"])
+    # https://github.com/tensorflow/tensorflow/issues/32159
+    import tensorflow as tf
+    from tensorflow.keras.models import Sequential
+    from tensorflow.keras.layers import (Dense, Dropout, Flatten, Conv2D,
+                                         MaxPooling2D)
     batch_size = 128
     num_classes = 10
     epochs = 12
@@ -40,8 +41,8 @@ def train_mnist(config, reporter):
     model.add(Dense(num_classes, activation="softmax"))
 
     model.compile(
-        loss=keras.losses.categorical_crossentropy,
-        optimizer=keras.optimizers.SGD(
+        loss=tf.keras.losses.categorical_crossentropy,
+        optimizer=tf.keras.optimizers.SGD(
             lr=config["lr"], momentum=config["momentum"]),
         metrics=["accuracy"])
 
diff --git a/python/ray/tune/experiment.py b/python/ray/tune/experiment.py
index fe631cd66..1af9b043f 100644
--- a/python/ray/tune/experiment.py
+++ b/python/ray/tune/experiment.py
@@ -6,11 +6,11 @@ import copy
 import logging
 import os
 import six
-import types
 
 from ray.tune.error import TuneError
 from ray.tune.registry import register_trainable
 from ray.tune.result import DEFAULT_RESULTS_DIR
+from ray.tune.sample import sample_from
 
 logger = logging.getLogger(__name__)
 
@@ -145,8 +145,7 @@ class Experiment(object):
     def _register_if_needed(cls, run_object):
         """Registers Trainable or Function at runtime.
 
-        Assumes already registered if run_object is a string. Does not
-        register lambdas because they could be part of variant generation.
+        Assumes already registered if run_object is a string.
         Also, does not inspect interface of given run_object.
 
         Arguments:
@@ -160,17 +159,16 @@ class Experiment(object):
 
         if isinstance(run_object, six.string_types):
             return run_object
-        elif isinstance(run_object, types.FunctionType):
-            if run_object.__name__ == "<lambda>":
-                logger.warning(
-                    "Not auto-registering lambdas - resolving as variant.")
-                return run_object
-            else:
+        elif isinstance(run_object, sample_from):
+            logger.warning("Not registering trainable. Resolving as variant.")
+            return run_object
+        elif isinstance(run_object, type) or callable(run_object):
+            name = "DEFAULT"
+            if hasattr(run_object, "__name__"):
                 name = run_object.__name__
-                register_trainable(name, run_object)
-                return name
-        elif isinstance(run_object, type):
-            name = run_object.__name__
+            else:
+                logger.warning(
+                    "No name detected on trainable. Using {}.".format(name))
             register_trainable(name, run_object)
             return name
         else:
diff --git a/python/ray/tune/integration/keras.py b/python/ray/tune/integration/keras.py
index 587728ef6..0131dac5d 100644
--- a/python/ray/tune/integration/keras.py
+++ b/python/ray/tune/integration/keras.py
@@ -32,7 +32,11 @@ class TuneReporterCallback(keras.callbacks.Callback):
         for metric in list(logs):
             if "loss" in metric and "neg_" not in metric:
                 logs["neg_" + metric] = -logs[metric]
-        self.reporter(keras_info=logs, mean_accuracy=logs["acc"])
+        print(logs)
+        if "acc" in logs:
+            self.reporter(keras_info=logs, mean_accuracy=logs["acc"])
+        else:
+            self.reporter(keras_info=logs, mean_accuracy=logs.get("accuracy"))
 
     def on_epoch_end(self, batch, logs={}):
         if not self.freq == "epoch":
diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py
index ff58d4c20..393962532 100644
--- a/python/ray/tune/logger.py
+++ b/python/ray/tune/logger.py
@@ -155,7 +155,7 @@ def tf2_compat_logger(config, logdir, trial=None):
 
 
 class TF2Logger(Logger):
-    """TensorBoard Logger for TF version >= 1.14.
+    """TensorBoard Logger for TF version >= 2.0.0.
 
     Automatically flattens nested dicts to show on TensorBoard:
 
@@ -175,7 +175,7 @@ class TF2Logger(Logger):
             from tensorboard.plugins.hparams import api as hp
             self._context = context
             self._file_writer = tf.summary.create_file_writer(self.logdir)
-        with tf.device("/CPU:0"), self._context.eager_mode():
+        with tf.device("/CPU:0"):
             with tf.summary.record_if(True), self._file_writer.as_default():
                 step = result.get(
                     TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]
@@ -226,7 +226,7 @@ def to_tf_values(result, path):
 
 
 class TFLogger(Logger):
-    """TensorBoard Logger for TF version < 1.14.
+    """TensorBoard Logger for TF version < 2.0.0.
 
     Automatically flattens nested dicts to show on TensorBoard:
 
diff --git a/python/ray/tune/tests/test_trial_runner.py b/python/ray/tune/tests/test_trial_runner.py
index a80108fbd..f37fa327a 100644
--- a/python/ray/tune/tests/test_trial_runner.py
+++ b/python/ray/tune/tests/test_trial_runner.py
@@ -214,11 +214,15 @@ class TrainableFunctionApiTest(unittest.TestCase):
             pass
 
         register_trainable("foo", train)
+        Experiment("test", train)
         register_trainable("foo", B)
+        Experiment("test", B)
         self.assertRaises(TypeError, lambda: register_trainable("foo", B()))
+        self.assertRaises(TuneError, lambda: Experiment("foo", B()))
         self.assertRaises(TypeError, lambda: register_trainable("foo", A))
+        self.assertRaises(TypeError, lambda: Experiment("foo", A))
 
-    def testRegisterTrainableCallable(self):
+    def testTrainableCallable(self):
         def dummy_fn(config, reporter, steps):
             reporter(timesteps_total=steps, done=True)
 
@@ -232,6 +236,9 @@ class TrainableFunctionApiTest(unittest.TestCase):
         })
         self.assertEqual(trial.status, Trial.TERMINATED)
         self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], steps)
+        [trial] = tune.run(partial(dummy_fn, steps=steps)).trials
+        self.assertEqual(trial.status, Trial.TERMINATED)
+        self.assertEqual(trial.last_result[TIMESTEPS_TOTAL], steps)
 
     def testBuiltInTrainableResources(self):
         class B(Trainable):