From f9b58d7b0252f312e7b0983ffefabf68d3e7a939 Mon Sep 17 00:00:00 2001
From: Richard Liaw <rliaw@berkeley.edu>
Date: Thu, 11 Oct 2018 23:42:13 -0700
Subject: [PATCH] [tune] Tweaks to Trainable and Verbosity (#2889)

---
 doc/source/tune-searchalg.rst                 |  6 +-
 doc/source/tune-usage.rst                     |  6 +-
 docker/examples/Dockerfile                    |  1 +
 python/ray/rllib/agents/agent.py              |  6 +-
 .../tune/examples/async_hyperband_example.py  |  2 +-
 python/ray/tune/examples/hyperband_example.py |  2 +-
 .../tune/examples/mnist_pytorch_trainable.py  |  6 +-
 python/ray/tune/examples/pbt_example.py       |  2 +-
 .../examples/pbt_tune_cifar10_with_keras.py   |  2 +-
 .../tune/examples/tune_mnist_ray_hyperband.py |  6 +-
 python/ray/tune/function_runner.py            |  4 +-
 python/ray/tune/result.py                     |  3 +-
 python/ray/tune/schedulers/hyperband.py       |  5 +-
 python/ray/tune/suggest/hyperopt.py           |  5 +-
 python/ray/tune/test/trial_runner_test.py     | 65 +++++++++++++++++
 python/ray/tune/trainable.py                  | 72 +++++++++++++------
 python/ray/tune/trial.py                      | 14 ++--
 17 files changed, 160 insertions(+), 47 deletions(-)

diff --git a/doc/source/tune-searchalg.rst b/doc/source/tune-searchalg.rst
index 97e8ce1bc..e8e5b0fa6 100644
--- a/doc/source/tune-searchalg.rst
+++ b/doc/source/tune-searchalg.rst
@@ -25,10 +25,13 @@ By default, Tune uses the `default search space and variant generation process <
     :noindex:
 
 
+Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format.
+
 HyperOpt Search (Tree-structured Parzen Estimators)
 ---------------------------------------------------
 
-The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization.
+The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch.
+
 In order to use this search algorithm, you will need to install HyperOpt via the following command:
 
 .. code-block:: bash
@@ -47,7 +50,6 @@ An example of this can be found in `hyperopt_example.py <https://github.com/ray-
     :show-inheritance:
     :noindex:
 
-
 Contributing a New Algorithm
 ----------------------------
 
diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst
index d485dcdd0..b52241f03 100644
--- a/doc/source/tune-usage.rst
+++ b/doc/source/tune-usage.rst
@@ -134,6 +134,9 @@ Tune Search Space (Default)
 
 You can use ``tune.grid_search`` to specify an axis of a grid search. By default, Tune also supports sampling parameters from user-specified lambda functions, which can be used independently or in combination with grid search.
 
+.. note::
+    If you specify an explicit Search Algorithm such as any SuggestionAlgorithm, you may not be able to specify lambdas or grid search with this interface, as the search algorithm may require a different search space declaration.
+
 The following shows grid search over two nested parameters combined with random sampling from two lambda functions, generating 9 different trials. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions.
 
 .. code-block:: python
@@ -157,9 +160,6 @@ The following shows grid search over two nested parameters combined with random
 .. note::
     Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it.
 
-.. warning::
-    If you specify a Search Algorithm, you may not be able to use this feature, as the algorithm may require a different search space declaration.
-
 For more information on variant generation, see `basic_variant.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/suggest/basic_variant.py>`__.
 
 Sampling Multiple Times
diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile
index d4e6c34b2..80685b7d3 100644
--- a/docker/examples/Dockerfile
+++ b/docker/examples/Dockerfile
@@ -6,5 +6,6 @@ FROM ray-project/deploy
 RUN conda install -y numpy
 RUN apt-get install -y zlib1g-dev
 RUN pip install gym[atari] opencv-python==3.2.0.8 tensorflow lz4 keras
+RUN pip install -U h5py  # Mutes FutureWarnings
 RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
 RUN conda install pytorch-cpu torchvision-cpu -c pytorch
diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py
index 030ae6424..8041eba06 100644
--- a/python/ray/rllib/agents/agent.py
+++ b/python/ray/rllib/agents/agent.py
@@ -234,10 +234,10 @@ class Agent(Trainable):
 
         return Trainable.train(self)
 
-    def _setup(self):
+    def _setup(self, config):
         env = self._env_id
         if env:
-            self.config["env"] = env
+            config["env"] = env
             if _global_registry.contains(ENV_CREATOR, env):
                 self.env_creator = _global_registry.get(ENV_CREATOR, env)
             else:
@@ -248,7 +248,7 @@ class Agent(Trainable):
 
         # Merge the supplied config with the class default
         merged_config = self._default_config.copy()
-        merged_config = deep_update(merged_config, self.config,
+        merged_config = deep_update(merged_config, config,
                                     self._allow_unknown_configs,
                                     self._allow_unknown_subkeys)
         self.config = merged_config
diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py
index 2c368b4e3..e07f11b32 100644
--- a/python/ray/tune/examples/async_hyperband_example.py
+++ b/python/ray/tune/examples/async_hyperband_example.py
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
     maximum reward value reached.
     """
 
-    def _setup(self):
+    def _setup(self, config):
         self.timestep = 0
 
     def _train(self):
diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py
index 94f603e82..baf133b41 100755
--- a/python/ray/tune/examples/hyperband_example.py
+++ b/python/ray/tune/examples/hyperband_example.py
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
     maximum reward value reached.
     """
 
-    def _setup(self):
+    def _setup(self, config):
         self.timestep = 0
 
     def _train(self):
diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
index 0d23c0cc2..2c0c68bce 100644
--- a/python/ray/tune/examples/mnist_pytorch_trainable.py
+++ b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -80,9 +80,9 @@ class Net(nn.Module):
 
 
 class TrainMNIST(Trainable):
-    def _setup(self):
-        args = self.config.pop("args")
-        vars(args).update(self.config)
+    def _setup(self, config):
+        args = config.pop("args")
+        vars(args).update(config)
         args.cuda = not args.no_cuda and torch.cuda.is_available()
 
         torch.manual_seed(args.seed)
diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py
index c958d2512..3433e82f9 100755
--- a/python/ray/tune/examples/pbt_example.py
+++ b/python/ray/tune/examples/pbt_example.py
@@ -18,7 +18,7 @@ from ray.tune.schedulers import PopulationBasedTraining
 class MyTrainableClass(Trainable):
     """Fake agent whose learning rate is determined by dummy factors."""
 
-    def _setup(self):
+    def _setup(self, config):
         self.timestep = 0
         self.current_value = 0.0
 
diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
index 28575f546..63e3d00e8 100755
--- a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
+++ b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
@@ -105,7 +105,7 @@ class Cifar10Model(Trainable):
         model = Model(inputs=x, outputs=y, name="model1")
         return model
 
-    def _setup(self):
+    def _setup(self, config):
         self.train_data, self.test_data = self._read_data()
         x_train = self.train_data[0]
         model = self._build_model(x_train.shape[1:])
diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
index 29939ff24..9dbc46775 100755
--- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py
+++ b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
@@ -128,7 +128,7 @@ def bias_variable(shape):
 class TrainMNIST(Trainable):
     """Example MNIST trainable."""
 
-    def _setup(self):
+    def _setup(self, config):
         global activation_fn
 
         self.timestep = 0
@@ -148,7 +148,7 @@ class TrainMNIST(Trainable):
         self.x = tf.placeholder(tf.float32, [None, 784])
         self.y_ = tf.placeholder(tf.float32, [None, 10])
 
-        activation_fn = getattr(tf.nn, self.config['activation'])
+        activation_fn = getattr(tf.nn, config['activation'])
 
         # Build the graph for the deep net
         y_conv, self.keep_prob = setupCNN(self.x)
@@ -160,7 +160,7 @@ class TrainMNIST(Trainable):
 
         with tf.name_scope('adam_optimizer'):
             train_step = tf.train.AdamOptimizer(
-                self.config['learning_rate']).minimize(cross_entropy)
+                config['learning_rate']).minimize(cross_entropy)
 
         self.train_step = train_step
 
diff --git a/python/ray/tune/function_runner.py b/python/ray/tune/function_runner.py
index d1704b6aa..1b93d3b6c 100644
--- a/python/ray/tune/function_runner.py
+++ b/python/ray/tune/function_runner.py
@@ -90,10 +90,10 @@ class FunctionRunner(Trainable):
     _name = "func"
     _default_config = DEFAULT_CONFIG
 
-    def _setup(self):
+    def _setup(self, config):
         entrypoint = self._trainable_func()
         self._status_reporter = StatusReporter()
-        scrubbed_config = self.config.copy()
+        scrubbed_config = config.copy()
         for k in self._default_config:
             if k in scrubbed_config:
                 del scrubbed_config[k]
diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py
index ec307eaed..5b7ade11f 100644
--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@@ -39,7 +39,8 @@ TIME_TOTAL_S = "time_total_s"
 TRAINING_ITERATION = "training_iteration"
 
 # Where Tune writes result files by default
-DEFAULT_RESULTS_DIR = os.path.expanduser("~/ray_results")
+DEFAULT_RESULTS_DIR = (os.environ.get("TUNE_RESULT_DIR")
+                       or os.path.expanduser("~/ray_results"))
 
 # Meta file about status under each experiment directory, can be
 # parsed by automlboard if exists.
diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py
index 7e2f8f27e..71c69b306 100644
--- a/python/ray/tune/schedulers/hyperband.py
+++ b/python/ray/tune/schedulers/hyperband.py
@@ -50,7 +50,10 @@ class HyperBandScheduler(FIFOScheduler):
     For example, to limit trials to 10 minutes and early stop based on the
     `episode_mean_reward` attr, construct:
 
-    ``HyperBand('time_total_s', 'episode_reward_mean', 600)``
+    ``HyperBand('time_total_s', 'episode_reward_mean', max_t=600)``
+
+    Note that Tune's stopping criteria will be applied in conjunction with
+    HyperBand's early stopping mechanisms.
 
     See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
 
diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py
index 45fe9753e..9173b56cc 100644
--- a/python/ray/tune/suggest/hyperopt.py
+++ b/python/ray/tune/suggest/hyperopt.py
@@ -4,7 +4,11 @@ from __future__ import print_function
 
 import numpy as np
 import copy
+import logging
+
 try:
+    hyperopt_logger = logging.getLogger("hyperopt")
+    hyperopt_logger.setLevel(logging.WARNING)
     import hyperopt as hpo
 except Exception as e:
     hpo = None
@@ -47,7 +51,6 @@ class HyperOptSearch(SuggestionAlgorithm):
         >>> }
         >>> algo = HyperOptSearch(
         >>>     space, max_concurrent=4, reward_attr="neg_mean_loss")
-        >>> algo.add_configurations(config)
     """
 
     def __init__(self,
diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py
index 1e4c0509d..65b8fbe36 100644
--- a/python/ray/tune/test/trial_runner_test.py
+++ b/python/ray/tune/test/trial_runner_test.py
@@ -433,6 +433,71 @@ class TrainableFunctionApiTest(unittest.TestCase):
         self.assertEqual(trial3.last_result[TIMESTEPS_TOTAL], 5)
         self.assertEqual(trial3.last_result["timesteps_this_iter"], 0)
 
+    def testCheckpointDict(self):
+        class TestTrain(Trainable):
+            def _setup(self, config):
+                self.state = {"hi": 1}
+
+            def _train(self):
+                return dict(timesteps_this_iter=1, done=True)
+
+            def _save(self, path):
+                return self.state
+
+            def _restore(self, state):
+                self.state = state
+
+        test_trainable = TestTrain()
+        result = test_trainable.save()
+        test_trainable.state["hi"] = 2
+        test_trainable.restore(result)
+        self.assertEqual(test_trainable.state["hi"], 1)
+
+        trials = run_experiments({
+            "foo": {
+                "run": TestTrain,
+                "checkpoint_at_end": True
+            }
+        })
+        for trial in trials:
+            self.assertEqual(trial.status, Trial.TERMINATED)
+            self.assertTrue(trial.has_checkpoint())
+
+    def testMultipleCheckpoints(self):
+        class TestTrain(Trainable):
+            def _setup(self, config):
+                self.state = {"hi": 1, "iter": 0}
+
+            def _train(self):
+                self.state["iter"] += 1
+                return dict(timesteps_this_iter=1, done=True)
+
+            def _save(self, path):
+                return self.state
+
+            def _restore(self, state):
+                self.state = state
+
+        test_trainable = TestTrain()
+        checkpoint_1 = test_trainable.save()
+        test_trainable.train()
+        checkpoint_2 = test_trainable.save()
+        self.assertNotEqual(checkpoint_1, checkpoint_2)
+        test_trainable.restore(checkpoint_2)
+        self.assertEqual(test_trainable.state["iter"], 1)
+        test_trainable.restore(checkpoint_1)
+        self.assertEqual(test_trainable.state["iter"], 0)
+
+        trials = run_experiments({
+            "foo": {
+                "run": TestTrain,
+                "checkpoint_at_end": True
+            }
+        })
+        for trial in trials:
+            self.assertEqual(trial.status, Trial.TERMINATED)
+            self.assertTrue(trial.has_checkpoint())
+
 
 class RunExperimentTest(unittest.TestCase):
     def setUp(self):
diff --git a/python/ray/tune/trainable.py b/python/ray/tune/trainable.py
index 1e537d26d..6c8b02cf0 100644
--- a/python/ray/tune/trainable.py
+++ b/python/ray/tune/trainable.py
@@ -4,6 +4,7 @@ from __future__ import print_function
 
 from datetime import datetime
 
+import copy
 import gzip
 import io
 import logging
@@ -83,7 +84,7 @@ class Trainable(object):
         self._timesteps_since_restore = 0
         self._iterations_since_restore = 0
         self._restored = False
-        self._setup()
+        self._setup(copy.deepcopy(self.config))
         self._local_ip = ray.services.get_node_ip_address()
 
     @classmethod
@@ -143,6 +144,8 @@ class Trainable(object):
 
         start = time.time()
         result = self._train()
+        assert isinstance(result, dict), "_train() needs to return a dict."
+
         result = result.copy()
 
         self._iteration += 1
@@ -211,11 +214,27 @@ class Trainable(object):
             Checkpoint path that may be passed to restore().
         """
 
-        checkpoint_path = self._save(checkpoint_dir or self.logdir)
-        pickle.dump([
-            self._experiment_id, self._iteration, self._timesteps_total,
-            self._time_total, self._episodes_total
-        ], open(checkpoint_path + ".tune_metadata", "wb"))
+        checkpoint_path = tempfile.mkdtemp(
+            prefix="checkpoint_{}".format(self._iteration),
+            dir=checkpoint_dir or self.logdir)
+        checkpoint = self._save(checkpoint_path)
+        saved_as_dict = False
+        if isinstance(checkpoint, str):
+            checkpoint_path = checkpoint
+        elif isinstance(checkpoint, dict):
+            saved_as_dict = True
+            pickle.dump(checkpoint, open(checkpoint_path + ".tune_state",
+                                         "wb"))
+        else:
+            raise ValueError("Return value from `_save` must be dict or str.")
+        pickle.dump({
+            "experiment_id": self._experiment_id,
+            "iteration": self._iteration,
+            "timesteps_total": self._timesteps_total,
+            "time_total": self._time_total,
+            "episodes_total": self._episodes_total,
+            "saved_as_dict": saved_as_dict
+        }, open(checkpoint_path + ".tune_metadata", "wb"))
         return checkpoint_path
 
     def save_to_object(self):
@@ -259,13 +278,19 @@ class Trainable(object):
         This method restores additional metadata saved with the checkpoint.
         """
 
-        self._restore(checkpoint_path)
         metadata = pickle.load(open(checkpoint_path + ".tune_metadata", "rb"))
-        self._experiment_id = metadata[0]
-        self._iteration = metadata[1]
-        self._timesteps_total = metadata[2]
-        self._time_total = metadata[3]
-        self._episodes_total = metadata[4]
+        self._experiment_id = metadata["experiment_id"]
+        self._iteration = metadata["iteration"]
+        self._timesteps_total = metadata["timesteps_total"]
+        self._time_total = metadata["time_total"]
+        self._episodes_total = metadata["episodes_total"]
+        saved_as_dict = metadata["saved_as_dict"]
+        if saved_as_dict:
+            with open(checkpoint_path + ".tune_state", "rb") as loaded_state:
+                checkpoint_dict = pickle.load(loaded_state)
+            self._restore(checkpoint_dict)
+        else:
+            self._restore(checkpoint_path)
         self._restored = True
 
     def restore_from_object(self, obj):
@@ -321,27 +346,34 @@ class Trainable(object):
                 can be stored.
 
         Returns:
-            Checkpoint path that may be passed to restore(). Typically
-                would default to `checkpoint_dir`.
+            checkpoint (str | dict): If string, the return value is
+                expected to be the checkpoint path that will be passed to
+                `_restore()`. If dict, the return value will be automatically
+                serialized by Tune and passed to `_restore()`.
+
+        Examples:
+            >>> checkpoint_data = trainable._save(checkpoint_dir)
+            >>> trainable2._restore(checkpoint_data)
         """
 
         raise NotImplementedError
 
-    def _restore(self, checkpoint_path):
+    def _restore(self, checkpoint):
         """Subclasses should override this to implement restore().
 
         Args:
-            checkpoint_path (str): The directory where the checkpoint
-                is stored.
+            checkpoint (str | dict): Value as returned by `_save`.
+                If a string, then it is the checkpoint path.
         """
 
         raise NotImplementedError
 
-    def _setup(self):
+    def _setup(self, config):
         """Subclasses should override this for custom initialization.
 
-        Subclasses can access the hyperparameter configuration via
-        ``self.config``.
+        Args:
+            config (dict): Hyperparameters and other configs given.
+                Copy of `self.config`.
         """
         pass
 
diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py
index 98fcbc6d5..59559ebbe 100644
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@@ -8,6 +8,7 @@ import logging
 import time
 import tempfile
 import os
+from numbers import Number
 
 import ray
 from ray.tune import TuneError
@@ -33,12 +34,14 @@ class Resources(
         namedtuple("Resources", ["cpu", "gpu", "extra_cpu", "extra_gpu"])):
     """Ray resources required to schedule a trial.
 
+    TODO: Custom resources.
+
     Attributes:
-        cpu (int): Number of CPUs to allocate to the trial.
-        gpu (int): Number of GPUs to allocate to the trial.
-        extra_cpu (int): Extra CPUs to reserve in case the trial needs to
+        cpu (float): Number of CPUs to allocate to the trial.
+        gpu (float): Number of GPUs to allocate to the trial.
+        extra_cpu (float): Extra CPUs to reserve in case the trial needs to
             launch additional Ray actors that use CPUs.
-        extra_gpu (int): Extra GPUs to reserve in case the trial needs to
+        extra_gpu (float): Extra GPUs to reserve in case the trial needs to
             launch additional Ray actors that use GPUs.
 
     """
@@ -46,6 +49,9 @@ class Resources(
     __slots__ = ()
 
     def __new__(cls, cpu, gpu, extra_cpu=0, extra_gpu=0):
+        for entry in [cpu, gpu, extra_cpu, extra_gpu]:
+            assert isinstance(entry, Number), "Improper resource value."
+            assert entry >= 0, "Resource cannot be negative."
         return super(Resources, cls).__new__(cls, cpu, gpu, extra_cpu,
                                              extra_gpu)