From f9b58d7b0252f312e7b0983ffefabf68d3e7a939 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Thu, 11 Oct 2018 23:42:13 -0700 Subject: [PATCH] [tune] Tweaks to Trainable and Verbosity (#2889) --- doc/source/tune-searchalg.rst | 6 +- doc/source/tune-usage.rst | 6 +- docker/examples/Dockerfile | 1 + python/ray/rllib/agents/agent.py | 6 +- .../tune/examples/async_hyperband_example.py | 2 +- python/ray/tune/examples/hyperband_example.py | 2 +- .../tune/examples/mnist_pytorch_trainable.py | 6 +- python/ray/tune/examples/pbt_example.py | 2 +- .../examples/pbt_tune_cifar10_with_keras.py | 2 +- .../tune/examples/tune_mnist_ray_hyperband.py | 6 +- python/ray/tune/function_runner.py | 4 +- python/ray/tune/result.py | 3 +- python/ray/tune/schedulers/hyperband.py | 5 +- python/ray/tune/suggest/hyperopt.py | 5 +- python/ray/tune/test/trial_runner_test.py | 65 +++++++++++++++++ python/ray/tune/trainable.py | 72 +++++++++++++------ python/ray/tune/trial.py | 14 ++-- 17 files changed, 160 insertions(+), 47 deletions(-) diff --git a/doc/source/tune-searchalg.rst b/doc/source/tune-searchalg.rst index 97e8ce1bc..e8e5b0fa6 100644 --- a/doc/source/tune-searchalg.rst +++ b/doc/source/tune-searchalg.rst @@ -25,10 +25,13 @@ By default, Tune uses the `default search space and variant generation process < :noindex: +Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format. + HyperOpt Search (Tree-structured Parzen Estimators) --------------------------------------------------- -The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt `__ to perform sequential model-based hyperparameter optimization. +The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt `__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch. + In order to use this search algorithm, you will need to install HyperOpt via the following command: .. code-block:: bash @@ -47,7 +50,6 @@ An example of this can be found in `hyperopt_example.py `__. Sampling Multiple Times diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index d4e6c34b2..80685b7d3 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -6,5 +6,6 @@ FROM ray-project/deploy RUN conda install -y numpy RUN apt-get install -y zlib1g-dev RUN pip install gym[atari] opencv-python==3.2.0.8 tensorflow lz4 keras +RUN pip install -U h5py # Mutes FutureWarnings RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git RUN conda install pytorch-cpu torchvision-cpu -c pytorch diff --git a/python/ray/rllib/agents/agent.py b/python/ray/rllib/agents/agent.py index 030ae6424..8041eba06 100644 --- a/python/ray/rllib/agents/agent.py +++ b/python/ray/rllib/agents/agent.py @@ -234,10 +234,10 @@ class Agent(Trainable): return Trainable.train(self) - def _setup(self): + def _setup(self, config): env = self._env_id if env: - self.config["env"] = env + config["env"] = env if _global_registry.contains(ENV_CREATOR, env): self.env_creator = _global_registry.get(ENV_CREATOR, env) else: @@ -248,7 +248,7 @@ class Agent(Trainable): # Merge the supplied config with the class default merged_config = self._default_config.copy() - merged_config = deep_update(merged_config, self.config, + merged_config = deep_update(merged_config, config, self._allow_unknown_configs, self._allow_unknown_subkeys) self.config = merged_config diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py index 2c368b4e3..e07f11b32 100644 --- a/python/ray/tune/examples/async_hyperband_example.py +++ b/python/ray/tune/examples/async_hyperband_example.py @@ -23,7 +23,7 @@ class MyTrainableClass(Trainable): maximum reward value reached. """ - def _setup(self): + def _setup(self, config): self.timestep = 0 def _train(self): diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index 94f603e82..baf133b41 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -23,7 +23,7 @@ class MyTrainableClass(Trainable): maximum reward value reached. """ - def _setup(self): + def _setup(self, config): self.timestep = 0 def _train(self): diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py index 0d23c0cc2..2c0c68bce 100644 --- a/python/ray/tune/examples/mnist_pytorch_trainable.py +++ b/python/ray/tune/examples/mnist_pytorch_trainable.py @@ -80,9 +80,9 @@ class Net(nn.Module): class TrainMNIST(Trainable): - def _setup(self): - args = self.config.pop("args") - vars(args).update(self.config) + def _setup(self, config): + args = config.pop("args") + vars(args).update(config) args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py index c958d2512..3433e82f9 100755 --- a/python/ray/tune/examples/pbt_example.py +++ b/python/ray/tune/examples/pbt_example.py @@ -18,7 +18,7 @@ from ray.tune.schedulers import PopulationBasedTraining class MyTrainableClass(Trainable): """Fake agent whose learning rate is determined by dummy factors.""" - def _setup(self): + def _setup(self, config): self.timestep = 0 self.current_value = 0.0 diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py index 28575f546..63e3d00e8 100755 --- a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py +++ b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py @@ -105,7 +105,7 @@ class Cifar10Model(Trainable): model = Model(inputs=x, outputs=y, name="model1") return model - def _setup(self): + def _setup(self, config): self.train_data, self.test_data = self._read_data() x_train = self.train_data[0] model = self._build_model(x_train.shape[1:]) diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py index 29939ff24..9dbc46775 100755 --- a/python/ray/tune/examples/tune_mnist_ray_hyperband.py +++ b/python/ray/tune/examples/tune_mnist_ray_hyperband.py @@ -128,7 +128,7 @@ def bias_variable(shape): class TrainMNIST(Trainable): """Example MNIST trainable.""" - def _setup(self): + def _setup(self, config): global activation_fn self.timestep = 0 @@ -148,7 +148,7 @@ class TrainMNIST(Trainable): self.x = tf.placeholder(tf.float32, [None, 784]) self.y_ = tf.placeholder(tf.float32, [None, 10]) - activation_fn = getattr(tf.nn, self.config['activation']) + activation_fn = getattr(tf.nn, config['activation']) # Build the graph for the deep net y_conv, self.keep_prob = setupCNN(self.x) @@ -160,7 +160,7 @@ class TrainMNIST(Trainable): with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer( - self.config['learning_rate']).minimize(cross_entropy) + config['learning_rate']).minimize(cross_entropy) self.train_step = train_step diff --git a/python/ray/tune/function_runner.py b/python/ray/tune/function_runner.py index d1704b6aa..1b93d3b6c 100644 --- a/python/ray/tune/function_runner.py +++ b/python/ray/tune/function_runner.py @@ -90,10 +90,10 @@ class FunctionRunner(Trainable): _name = "func" _default_config = DEFAULT_CONFIG - def _setup(self): + def _setup(self, config): entrypoint = self._trainable_func() self._status_reporter = StatusReporter() - scrubbed_config = self.config.copy() + scrubbed_config = config.copy() for k in self._default_config: if k in scrubbed_config: del scrubbed_config[k] diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py index ec307eaed..5b7ade11f 100644 --- a/python/ray/tune/result.py +++ b/python/ray/tune/result.py @@ -39,7 +39,8 @@ TIME_TOTAL_S = "time_total_s" TRAINING_ITERATION = "training_iteration" # Where Tune writes result files by default -DEFAULT_RESULTS_DIR = os.path.expanduser("~/ray_results") +DEFAULT_RESULTS_DIR = (os.environ.get("TUNE_RESULT_DIR") + or os.path.expanduser("~/ray_results")) # Meta file about status under each experiment directory, can be # parsed by automlboard if exists. diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py index 7e2f8f27e..71c69b306 100644 --- a/python/ray/tune/schedulers/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -50,7 +50,10 @@ class HyperBandScheduler(FIFOScheduler): For example, to limit trials to 10 minutes and early stop based on the `episode_mean_reward` attr, construct: - ``HyperBand('time_total_s', 'episode_reward_mean', 600)`` + ``HyperBand('time_total_s', 'episode_reward_mean', max_t=600)`` + + Note that Tune's stopping criteria will be applied in conjunction with + HyperBand's early stopping mechanisms. See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index 45fe9753e..9173b56cc 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -4,7 +4,11 @@ from __future__ import print_function import numpy as np import copy +import logging + try: + hyperopt_logger = logging.getLogger("hyperopt") + hyperopt_logger.setLevel(logging.WARNING) import hyperopt as hpo except Exception as e: hpo = None @@ -47,7 +51,6 @@ class HyperOptSearch(SuggestionAlgorithm): >>> } >>> algo = HyperOptSearch( >>> space, max_concurrent=4, reward_attr="neg_mean_loss") - >>> algo.add_configurations(config) """ def __init__(self, diff --git a/python/ray/tune/test/trial_runner_test.py b/python/ray/tune/test/trial_runner_test.py index 1e4c0509d..65b8fbe36 100644 --- a/python/ray/tune/test/trial_runner_test.py +++ b/python/ray/tune/test/trial_runner_test.py @@ -433,6 +433,71 @@ class TrainableFunctionApiTest(unittest.TestCase): self.assertEqual(trial3.last_result[TIMESTEPS_TOTAL], 5) self.assertEqual(trial3.last_result["timesteps_this_iter"], 0) + def testCheckpointDict(self): + class TestTrain(Trainable): + def _setup(self, config): + self.state = {"hi": 1} + + def _train(self): + return dict(timesteps_this_iter=1, done=True) + + def _save(self, path): + return self.state + + def _restore(self, state): + self.state = state + + test_trainable = TestTrain() + result = test_trainable.save() + test_trainable.state["hi"] = 2 + test_trainable.restore(result) + self.assertEqual(test_trainable.state["hi"], 1) + + trials = run_experiments({ + "foo": { + "run": TestTrain, + "checkpoint_at_end": True + } + }) + for trial in trials: + self.assertEqual(trial.status, Trial.TERMINATED) + self.assertTrue(trial.has_checkpoint()) + + def testMultipleCheckpoints(self): + class TestTrain(Trainable): + def _setup(self, config): + self.state = {"hi": 1, "iter": 0} + + def _train(self): + self.state["iter"] += 1 + return dict(timesteps_this_iter=1, done=True) + + def _save(self, path): + return self.state + + def _restore(self, state): + self.state = state + + test_trainable = TestTrain() + checkpoint_1 = test_trainable.save() + test_trainable.train() + checkpoint_2 = test_trainable.save() + self.assertNotEqual(checkpoint_1, checkpoint_2) + test_trainable.restore(checkpoint_2) + self.assertEqual(test_trainable.state["iter"], 1) + test_trainable.restore(checkpoint_1) + self.assertEqual(test_trainable.state["iter"], 0) + + trials = run_experiments({ + "foo": { + "run": TestTrain, + "checkpoint_at_end": True + } + }) + for trial in trials: + self.assertEqual(trial.status, Trial.TERMINATED) + self.assertTrue(trial.has_checkpoint()) + class RunExperimentTest(unittest.TestCase): def setUp(self): diff --git a/python/ray/tune/trainable.py b/python/ray/tune/trainable.py index 1e537d26d..6c8b02cf0 100644 --- a/python/ray/tune/trainable.py +++ b/python/ray/tune/trainable.py @@ -4,6 +4,7 @@ from __future__ import print_function from datetime import datetime +import copy import gzip import io import logging @@ -83,7 +84,7 @@ class Trainable(object): self._timesteps_since_restore = 0 self._iterations_since_restore = 0 self._restored = False - self._setup() + self._setup(copy.deepcopy(self.config)) self._local_ip = ray.services.get_node_ip_address() @classmethod @@ -143,6 +144,8 @@ class Trainable(object): start = time.time() result = self._train() + assert isinstance(result, dict), "_train() needs to return a dict." + result = result.copy() self._iteration += 1 @@ -211,11 +214,27 @@ class Trainable(object): Checkpoint path that may be passed to restore(). """ - checkpoint_path = self._save(checkpoint_dir or self.logdir) - pickle.dump([ - self._experiment_id, self._iteration, self._timesteps_total, - self._time_total, self._episodes_total - ], open(checkpoint_path + ".tune_metadata", "wb")) + checkpoint_path = tempfile.mkdtemp( + prefix="checkpoint_{}".format(self._iteration), + dir=checkpoint_dir or self.logdir) + checkpoint = self._save(checkpoint_path) + saved_as_dict = False + if isinstance(checkpoint, str): + checkpoint_path = checkpoint + elif isinstance(checkpoint, dict): + saved_as_dict = True + pickle.dump(checkpoint, open(checkpoint_path + ".tune_state", + "wb")) + else: + raise ValueError("Return value from `_save` must be dict or str.") + pickle.dump({ + "experiment_id": self._experiment_id, + "iteration": self._iteration, + "timesteps_total": self._timesteps_total, + "time_total": self._time_total, + "episodes_total": self._episodes_total, + "saved_as_dict": saved_as_dict + }, open(checkpoint_path + ".tune_metadata", "wb")) return checkpoint_path def save_to_object(self): @@ -259,13 +278,19 @@ class Trainable(object): This method restores additional metadata saved with the checkpoint. """ - self._restore(checkpoint_path) metadata = pickle.load(open(checkpoint_path + ".tune_metadata", "rb")) - self._experiment_id = metadata[0] - self._iteration = metadata[1] - self._timesteps_total = metadata[2] - self._time_total = metadata[3] - self._episodes_total = metadata[4] + self._experiment_id = metadata["experiment_id"] + self._iteration = metadata["iteration"] + self._timesteps_total = metadata["timesteps_total"] + self._time_total = metadata["time_total"] + self._episodes_total = metadata["episodes_total"] + saved_as_dict = metadata["saved_as_dict"] + if saved_as_dict: + with open(checkpoint_path + ".tune_state", "rb") as loaded_state: + checkpoint_dict = pickle.load(loaded_state) + self._restore(checkpoint_dict) + else: + self._restore(checkpoint_path) self._restored = True def restore_from_object(self, obj): @@ -321,27 +346,34 @@ class Trainable(object): can be stored. Returns: - Checkpoint path that may be passed to restore(). Typically - would default to `checkpoint_dir`. + checkpoint (str | dict): If string, the return value is + expected to be the checkpoint path that will be passed to + `_restore()`. If dict, the return value will be automatically + serialized by Tune and passed to `_restore()`. + + Examples: + >>> checkpoint_data = trainable._save(checkpoint_dir) + >>> trainable2._restore(checkpoint_data) """ raise NotImplementedError - def _restore(self, checkpoint_path): + def _restore(self, checkpoint): """Subclasses should override this to implement restore(). Args: - checkpoint_path (str): The directory where the checkpoint - is stored. + checkpoint (str | dict): Value as returned by `_save`. + If a string, then it is the checkpoint path. """ raise NotImplementedError - def _setup(self): + def _setup(self, config): """Subclasses should override this for custom initialization. - Subclasses can access the hyperparameter configuration via - ``self.config``. + Args: + config (dict): Hyperparameters and other configs given. + Copy of `self.config`. """ pass diff --git a/python/ray/tune/trial.py b/python/ray/tune/trial.py index 98fcbc6d5..59559ebbe 100644 --- a/python/ray/tune/trial.py +++ b/python/ray/tune/trial.py @@ -8,6 +8,7 @@ import logging import time import tempfile import os +from numbers import Number import ray from ray.tune import TuneError @@ -33,12 +34,14 @@ class Resources( namedtuple("Resources", ["cpu", "gpu", "extra_cpu", "extra_gpu"])): """Ray resources required to schedule a trial. + TODO: Custom resources. + Attributes: - cpu (int): Number of CPUs to allocate to the trial. - gpu (int): Number of GPUs to allocate to the trial. - extra_cpu (int): Extra CPUs to reserve in case the trial needs to + cpu (float): Number of CPUs to allocate to the trial. + gpu (float): Number of GPUs to allocate to the trial. + extra_cpu (float): Extra CPUs to reserve in case the trial needs to launch additional Ray actors that use CPUs. - extra_gpu (int): Extra GPUs to reserve in case the trial needs to + extra_gpu (float): Extra GPUs to reserve in case the trial needs to launch additional Ray actors that use GPUs. """ @@ -46,6 +49,9 @@ class Resources( __slots__ = () def __new__(cls, cpu, gpu, extra_cpu=0, extra_gpu=0): + for entry in [cpu, gpu, extra_cpu, extra_gpu]: + assert isinstance(entry, Number), "Improper resource value." + assert entry >= 0, "Resource cannot be negative." return super(Resources, cls).__new__(cls, cpu, gpu, extra_cpu, extra_gpu)