[tune] Tweaks to Trainable and Verbosity (#2889)

2026-06-27 19:32:11 +08:00 · 2018-10-11 23:42:13 -07:00
parent 828fe24b39
commit f9b58d7b02
17 changed files with 160 additions and 47 deletions
@@ -25,10 +25,13 @@ By default, Tune uses the `default search space and variant generation process <
    :noindex:


+Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format.
+
 HyperOpt Search (Tree-structured Parzen Estimators)
 ---------------------------------------------------

-The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization.
+The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch.
+
 In order to use this search algorithm, you will need to install HyperOpt via the following command:

 .. code-block:: bash
@@ -47,7 +50,6 @@ An example of this can be found in `hyperopt_example.py <https://github.com/ray-
    :show-inheritance:
    :noindex:

-
 Contributing a New Algorithm
 ----------------------------

@@ -134,6 +134,9 @@ Tune Search Space (Default)

 You can use ``tune.grid_search`` to specify an axis of a grid search. By default, Tune also supports sampling parameters from user-specified lambda functions, which can be used independently or in combination with grid search.

+.. note::
+    If you specify an explicit Search Algorithm such as any SuggestionAlgorithm, you may not be able to specify lambdas or grid search with this interface, as the search algorithm may require a different search space declaration.
+
 The following shows grid search over two nested parameters combined with random sampling from two lambda functions, generating 9 different trials. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions.

 .. code-block:: python
@@ -157,9 +160,6 @@ The following shows grid search over two nested parameters combined with random
 .. note::
    Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it.

-.. warning::
-    If you specify a Search Algorithm, you may not be able to use this feature, as the algorithm may require a different search space declaration.
-
 For more information on variant generation, see `basic_variant.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/suggest/basic_variant.py>`__.

 Sampling Multiple Times
@@ -6,5 +6,6 @@ FROM ray-project/deploy
 RUN conda install -y numpy
 RUN apt-get install -y zlib1g-dev
 RUN pip install gym[atari] opencv-python==3.2.0.8 tensorflow lz4 keras
+RUN pip install -U h5py  # Mutes FutureWarnings
 RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
 RUN conda install pytorch-cpu torchvision-cpu -c pytorch
@@ -234,10 +234,10 @@ class Agent(Trainable):

        return Trainable.train(self)

-    def _setup(self):
+    def _setup(self, config):
        env = self._env_id
        if env:
-            self.config["env"] = env
+            config["env"] = env
            if _global_registry.contains(ENV_CREATOR, env):
                self.env_creator = _global_registry.get(ENV_CREATOR, env)
            else:
@@ -248,7 +248,7 @@ class Agent(Trainable):

        # Merge the supplied config with the class default
        merged_config = self._default_config.copy()
-        merged_config = deep_update(merged_config, self.config,
+        merged_config = deep_update(merged_config, config,
                                    self._allow_unknown_configs,
                                    self._allow_unknown_subkeys)
        self.config = merged_config
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
    maximum reward value reached.
    """

-    def _setup(self):
+    def _setup(self, config):
        self.timestep = 0

    def _train(self):
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
    maximum reward value reached.
    """

-    def _setup(self):
+    def _setup(self, config):
        self.timestep = 0

    def _train(self):
@@ -80,9 +80,9 @@ class Net(nn.Module):


 class TrainMNIST(Trainable):
-    def _setup(self):
-        args = self.config.pop("args")
-        vars(args).update(self.config)
+    def _setup(self, config):
+        args = config.pop("args")
+        vars(args).update(config)
        args.cuda = not args.no_cuda and torch.cuda.is_available()

        torch.manual_seed(args.seed)
@@ -18,7 +18,7 @@ from ray.tune.schedulers import PopulationBasedTraining
 class MyTrainableClass(Trainable):
    """Fake agent whose learning rate is determined by dummy factors."""

-    def _setup(self):
+    def _setup(self, config):
        self.timestep = 0
        self.current_value = 0.0

@@ -105,7 +105,7 @@ class Cifar10Model(Trainable):
        model = Model(inputs=x, outputs=y, name="model1")
        return model

-    def _setup(self):
+    def _setup(self, config):
        self.train_data, self.test_data = self._read_data()
        x_train = self.train_data[0]
        model = self._build_model(x_train.shape[1:])
@@ -128,7 +128,7 @@ def bias_variable(shape):
 class TrainMNIST(Trainable):
    """Example MNIST trainable."""

-    def _setup(self):
+    def _setup(self, config):
        global activation_fn

        self.timestep = 0
@@ -148,7 +148,7 @@ class TrainMNIST(Trainable):
        self.x = tf.placeholder(tf.float32, [None, 784])
        self.y_ = tf.placeholder(tf.float32, [None, 10])

-        activation_fn = getattr(tf.nn, self.config['activation'])
+        activation_fn = getattr(tf.nn, config['activation'])

        # Build the graph for the deep net
        y_conv, self.keep_prob = setupCNN(self.x)
@@ -160,7 +160,7 @@ class TrainMNIST(Trainable):

        with tf.name_scope('adam_optimizer'):
            train_step = tf.train.AdamOptimizer(
-                self.config['learning_rate']).minimize(cross_entropy)
+                config['learning_rate']).minimize(cross_entropy)

        self.train_step = train_step

@@ -90,10 +90,10 @@ class FunctionRunner(Trainable):
    _name = "func"
    _default_config = DEFAULT_CONFIG

-    def _setup(self):
+    def _setup(self, config):
        entrypoint = self._trainable_func()
        self._status_reporter = StatusReporter()
-        scrubbed_config = self.config.copy()
+        scrubbed_config = config.copy()
        for k in self._default_config:
            if k in scrubbed_config:
                del scrubbed_config[k]
@@ -39,7 +39,8 @@ TIME_TOTAL_S = "time_total_s"
 TRAINING_ITERATION = "training_iteration"

 # Where Tune writes result files by default
-DEFAULT_RESULTS_DIR = os.path.expanduser("~/ray_results")
+DEFAULT_RESULTS_DIR = (os.environ.get("TUNE_RESULT_DIR")
+                       or os.path.expanduser("~/ray_results"))

 # Meta file about status under each experiment directory, can be
 # parsed by automlboard if exists.
@@ -50,7 +50,10 @@ class HyperBandScheduler(FIFOScheduler):
    For example, to limit trials to 10 minutes and early stop based on the
    `episode_mean_reward` attr, construct:

-    ``HyperBand('time_total_s', 'episode_reward_mean', 600)``
+    ``HyperBand('time_total_s', 'episode_reward_mean', max_t=600)``
+
+    Note that Tune's stopping criteria will be applied in conjunction with
+    HyperBand's early stopping mechanisms.

    See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html

@@ -4,7 +4,11 @@ from __future__ import print_function

 import numpy as np
 import copy
+import logging
+
 try:
+    hyperopt_logger = logging.getLogger("hyperopt")
+    hyperopt_logger.setLevel(logging.WARNING)
    import hyperopt as hpo
 except Exception as e:
    hpo = None
@@ -47,7 +51,6 @@ class HyperOptSearch(SuggestionAlgorithm):
        >>> }
        >>> algo = HyperOptSearch(
        >>>     space, max_concurrent=4, reward_attr="neg_mean_loss")
-        >>> algo.add_configurations(config)
    """

    def __init__(self,
@@ -433,6 +433,71 @@ class TrainableFunctionApiTest(unittest.TestCase):
        self.assertEqual(trial3.last_result[TIMESTEPS_TOTAL], 5)
        self.assertEqual(trial3.last_result["timesteps_this_iter"], 0)

+    def testCheckpointDict(self):
+        class TestTrain(Trainable):
+            def _setup(self, config):
+                self.state = {"hi": 1}
+
+            def _train(self):
+                return dict(timesteps_this_iter=1, done=True)
+
+            def _save(self, path):
+                return self.state
+
+            def _restore(self, state):
+                self.state = state
+
+        test_trainable = TestTrain()
+        result = test_trainable.save()
+        test_trainable.state["hi"] = 2
+        test_trainable.restore(result)
+        self.assertEqual(test_trainable.state["hi"], 1)
+
+        trials = run_experiments({
+            "foo": {
+                "run": TestTrain,
+                "checkpoint_at_end": True
+            }
+        })
+        for trial in trials:
+            self.assertEqual(trial.status, Trial.TERMINATED)
+            self.assertTrue(trial.has_checkpoint())
+
+    def testMultipleCheckpoints(self):
+        class TestTrain(Trainable):
+            def _setup(self, config):
+                self.state = {"hi": 1, "iter": 0}
+
+            def _train(self):
+                self.state["iter"] += 1
+                return dict(timesteps_this_iter=1, done=True)
+
+            def _save(self, path):
+                return self.state
+
+            def _restore(self, state):
+                self.state = state
+
+        test_trainable = TestTrain()
+        checkpoint_1 = test_trainable.save()
+        test_trainable.train()
+        checkpoint_2 = test_trainable.save()
+        self.assertNotEqual(checkpoint_1, checkpoint_2)
+        test_trainable.restore(checkpoint_2)
+        self.assertEqual(test_trainable.state["iter"], 1)
+        test_trainable.restore(checkpoint_1)
+        self.assertEqual(test_trainable.state["iter"], 0)
+
+        trials = run_experiments({
+            "foo": {
+                "run": TestTrain,
+                "checkpoint_at_end": True
+            }
+        })
+        for trial in trials:
+            self.assertEqual(trial.status, Trial.TERMINATED)
+            self.assertTrue(trial.has_checkpoint())
+

 class RunExperimentTest(unittest.TestCase):
    def setUp(self):
@@ -4,6 +4,7 @@ from __future__ import print_function

 from datetime import datetime

+import copy
 import gzip
 import io
 import logging
@@ -83,7 +84,7 @@ class Trainable(object):
        self._timesteps_since_restore = 0
        self._iterations_since_restore = 0
        self._restored = False
-        self._setup()
+        self._setup(copy.deepcopy(self.config))
        self._local_ip = ray.services.get_node_ip_address()

    @classmethod
@@ -143,6 +144,8 @@ class Trainable(object):

        start = time.time()
        result = self._train()
+        assert isinstance(result, dict), "_train() needs to return a dict."
+
        result = result.copy()

        self._iteration += 1
@@ -211,11 +214,27 @@ class Trainable(object):
            Checkpoint path that may be passed to restore().
        """

-        checkpoint_path = self._save(checkpoint_dir or self.logdir)
-        pickle.dump([
-            self._experiment_id, self._iteration, self._timesteps_total,
-            self._time_total, self._episodes_total
-        ], open(checkpoint_path + ".tune_metadata", "wb"))
+        checkpoint_path = tempfile.mkdtemp(
+            prefix="checkpoint_{}".format(self._iteration),
+            dir=checkpoint_dir or self.logdir)
+        checkpoint = self._save(checkpoint_path)
+        saved_as_dict = False
+        if isinstance(checkpoint, str):
+            checkpoint_path = checkpoint
+        elif isinstance(checkpoint, dict):
+            saved_as_dict = True
+            pickle.dump(checkpoint, open(checkpoint_path + ".tune_state",
+                                         "wb"))
+        else:
+            raise ValueError("Return value from `_save` must be dict or str.")
+        pickle.dump({
+            "experiment_id": self._experiment_id,
+            "iteration": self._iteration,
+            "timesteps_total": self._timesteps_total,
+            "time_total": self._time_total,
+            "episodes_total": self._episodes_total,
+            "saved_as_dict": saved_as_dict
+        }, open(checkpoint_path + ".tune_metadata", "wb"))
        return checkpoint_path

    def save_to_object(self):
@@ -259,13 +278,19 @@ class Trainable(object):
        This method restores additional metadata saved with the checkpoint.
        """

-        self._restore(checkpoint_path)
        metadata = pickle.load(open(checkpoint_path + ".tune_metadata", "rb"))
-        self._experiment_id = metadata[0]
-        self._iteration = metadata[1]
-        self._timesteps_total = metadata[2]
-        self._time_total = metadata[3]
-        self._episodes_total = metadata[4]
+        self._experiment_id = metadata["experiment_id"]
+        self._iteration = metadata["iteration"]
+        self._timesteps_total = metadata["timesteps_total"]
+        self._time_total = metadata["time_total"]
+        self._episodes_total = metadata["episodes_total"]
+        saved_as_dict = metadata["saved_as_dict"]
+        if saved_as_dict:
+            with open(checkpoint_path + ".tune_state", "rb") as loaded_state:
+                checkpoint_dict = pickle.load(loaded_state)
+            self._restore(checkpoint_dict)
+        else:
+            self._restore(checkpoint_path)
        self._restored = True

    def restore_from_object(self, obj):
@@ -321,27 +346,34 @@ class Trainable(object):
                can be stored.

        Returns:
-            Checkpoint path that may be passed to restore(). Typically
-                would default to `checkpoint_dir`.
+            checkpoint (str | dict): If string, the return value is
+                expected to be the checkpoint path that will be passed to
+                `_restore()`. If dict, the return value will be automatically
+                serialized by Tune and passed to `_restore()`.
+
+        Examples:
+            >>> checkpoint_data = trainable._save(checkpoint_dir)
+            >>> trainable2._restore(checkpoint_data)
        """

        raise NotImplementedError

-    def _restore(self, checkpoint_path):
+    def _restore(self, checkpoint):
        """Subclasses should override this to implement restore().

        Args:
-            checkpoint_path (str): The directory where the checkpoint
-                is stored.
+            checkpoint (str | dict): Value as returned by `_save`.
+                If a string, then it is the checkpoint path.
        """

        raise NotImplementedError

-    def _setup(self):
+    def _setup(self, config):
        """Subclasses should override this for custom initialization.

-        Subclasses can access the hyperparameter configuration via
-        ``self.config``.
+        Args:
+            config (dict): Hyperparameters and other configs given.
+                Copy of `self.config`.
        """
        pass

@@ -8,6 +8,7 @@ import logging
 import time
 import tempfile
 import os
+from numbers import Number

 import ray
 from ray.tune import TuneError
@@ -33,12 +34,14 @@ class Resources(
        namedtuple("Resources", ["cpu", "gpu", "extra_cpu", "extra_gpu"])):
    """Ray resources required to schedule a trial.

+    TODO: Custom resources.
+
    Attributes:
-        cpu (int): Number of CPUs to allocate to the trial.
-        gpu (int): Number of GPUs to allocate to the trial.
-        extra_cpu (int): Extra CPUs to reserve in case the trial needs to
+        cpu (float): Number of CPUs to allocate to the trial.
+        gpu (float): Number of GPUs to allocate to the trial.
+        extra_cpu (float): Extra CPUs to reserve in case the trial needs to
            launch additional Ray actors that use CPUs.
-        extra_gpu (int): Extra GPUs to reserve in case the trial needs to
+        extra_gpu (float): Extra GPUs to reserve in case the trial needs to
            launch additional Ray actors that use GPUs.

    """
@@ -46,6 +49,9 @@ class Resources(
    __slots__ = ()

    def __new__(cls, cpu, gpu, extra_cpu=0, extra_gpu=0):
+        for entry in [cpu, gpu, extra_cpu, extra_gpu]:
+            assert isinstance(entry, Number), "Improper resource value."
+            assert entry >= 0, "Resource cannot be negative."
        return super(Resources, cls).__new__(cls, cpu, gpu, extra_cpu,
                                             extra_gpu)