[tune] Tweaks to Trainable and Verbosity (#2889)

This commit is contained in:
Richard Liaw
2018-10-11 23:42:13 -07:00
committed by GitHub
parent 828fe24b39
commit f9b58d7b02
17 changed files with 160 additions and 47 deletions
+4 -2
View File
@@ -25,10 +25,13 @@ By default, Tune uses the `default search space and variant generation process <
:noindex:
Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format.
HyperOpt Search (Tree-structured Parzen Estimators)
---------------------------------------------------
The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization.
The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch.
In order to use this search algorithm, you will need to install HyperOpt via the following command:
.. code-block:: bash
@@ -47,7 +50,6 @@ An example of this can be found in `hyperopt_example.py <https://github.com/ray-
:show-inheritance:
:noindex:
Contributing a New Algorithm
----------------------------
+3 -3
View File
@@ -134,6 +134,9 @@ Tune Search Space (Default)
You can use ``tune.grid_search`` to specify an axis of a grid search. By default, Tune also supports sampling parameters from user-specified lambda functions, which can be used independently or in combination with grid search.
.. note::
If you specify an explicit Search Algorithm such as any SuggestionAlgorithm, you may not be able to specify lambdas or grid search with this interface, as the search algorithm may require a different search space declaration.
The following shows grid search over two nested parameters combined with random sampling from two lambda functions, generating 9 different trials. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions.
.. code-block:: python
@@ -157,9 +160,6 @@ The following shows grid search over two nested parameters combined with random
.. note::
Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it.
.. warning::
If you specify a Search Algorithm, you may not be able to use this feature, as the algorithm may require a different search space declaration.
For more information on variant generation, see `basic_variant.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/suggest/basic_variant.py>`__.
Sampling Multiple Times
+1
View File
@@ -6,5 +6,6 @@ FROM ray-project/deploy
RUN conda install -y numpy
RUN apt-get install -y zlib1g-dev
RUN pip install gym[atari] opencv-python==3.2.0.8 tensorflow lz4 keras
RUN pip install -U h5py # Mutes FutureWarnings
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
+3 -3
View File
@@ -234,10 +234,10 @@ class Agent(Trainable):
return Trainable.train(self)
def _setup(self):
def _setup(self, config):
env = self._env_id
if env:
self.config["env"] = env
config["env"] = env
if _global_registry.contains(ENV_CREATOR, env):
self.env_creator = _global_registry.get(ENV_CREATOR, env)
else:
@@ -248,7 +248,7 @@ class Agent(Trainable):
# Merge the supplied config with the class default
merged_config = self._default_config.copy()
merged_config = deep_update(merged_config, self.config,
merged_config = deep_update(merged_config, config,
self._allow_unknown_configs,
self._allow_unknown_subkeys)
self.config = merged_config
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
maximum reward value reached.
"""
def _setup(self):
def _setup(self, config):
self.timestep = 0
def _train(self):
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
maximum reward value reached.
"""
def _setup(self):
def _setup(self, config):
self.timestep = 0
def _train(self):
@@ -80,9 +80,9 @@ class Net(nn.Module):
class TrainMNIST(Trainable):
def _setup(self):
args = self.config.pop("args")
vars(args).update(self.config)
def _setup(self, config):
args = config.pop("args")
vars(args).update(config)
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
+1 -1
View File
@@ -18,7 +18,7 @@ from ray.tune.schedulers import PopulationBasedTraining
class MyTrainableClass(Trainable):
"""Fake agent whose learning rate is determined by dummy factors."""
def _setup(self):
def _setup(self, config):
self.timestep = 0
self.current_value = 0.0
@@ -105,7 +105,7 @@ class Cifar10Model(Trainable):
model = Model(inputs=x, outputs=y, name="model1")
return model
def _setup(self):
def _setup(self, config):
self.train_data, self.test_data = self._read_data()
x_train = self.train_data[0]
model = self._build_model(x_train.shape[1:])
@@ -128,7 +128,7 @@ def bias_variable(shape):
class TrainMNIST(Trainable):
"""Example MNIST trainable."""
def _setup(self):
def _setup(self, config):
global activation_fn
self.timestep = 0
@@ -148,7 +148,7 @@ class TrainMNIST(Trainable):
self.x = tf.placeholder(tf.float32, [None, 784])
self.y_ = tf.placeholder(tf.float32, [None, 10])
activation_fn = getattr(tf.nn, self.config['activation'])
activation_fn = getattr(tf.nn, config['activation'])
# Build the graph for the deep net
y_conv, self.keep_prob = setupCNN(self.x)
@@ -160,7 +160,7 @@ class TrainMNIST(Trainable):
with tf.name_scope('adam_optimizer'):
train_step = tf.train.AdamOptimizer(
self.config['learning_rate']).minimize(cross_entropy)
config['learning_rate']).minimize(cross_entropy)
self.train_step = train_step
+2 -2
View File
@@ -90,10 +90,10 @@ class FunctionRunner(Trainable):
_name = "func"
_default_config = DEFAULT_CONFIG
def _setup(self):
def _setup(self, config):
entrypoint = self._trainable_func()
self._status_reporter = StatusReporter()
scrubbed_config = self.config.copy()
scrubbed_config = config.copy()
for k in self._default_config:
if k in scrubbed_config:
del scrubbed_config[k]
+2 -1
View File
@@ -39,7 +39,8 @@ TIME_TOTAL_S = "time_total_s"
TRAINING_ITERATION = "training_iteration"
# Where Tune writes result files by default
DEFAULT_RESULTS_DIR = os.path.expanduser("~/ray_results")
DEFAULT_RESULTS_DIR = (os.environ.get("TUNE_RESULT_DIR")
or os.path.expanduser("~/ray_results"))
# Meta file about status under each experiment directory, can be
# parsed by automlboard if exists.
+4 -1
View File
@@ -50,7 +50,10 @@ class HyperBandScheduler(FIFOScheduler):
For example, to limit trials to 10 minutes and early stop based on the
`episode_mean_reward` attr, construct:
``HyperBand('time_total_s', 'episode_reward_mean', 600)``
``HyperBand('time_total_s', 'episode_reward_mean', max_t=600)``
Note that Tune's stopping criteria will be applied in conjunction with
HyperBand's early stopping mechanisms.
See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
+4 -1
View File
@@ -4,7 +4,11 @@ from __future__ import print_function
import numpy as np
import copy
import logging
try:
hyperopt_logger = logging.getLogger("hyperopt")
hyperopt_logger.setLevel(logging.WARNING)
import hyperopt as hpo
except Exception as e:
hpo = None
@@ -47,7 +51,6 @@ class HyperOptSearch(SuggestionAlgorithm):
>>> }
>>> algo = HyperOptSearch(
>>> space, max_concurrent=4, reward_attr="neg_mean_loss")
>>> algo.add_configurations(config)
"""
def __init__(self,
+65
View File
@@ -433,6 +433,71 @@ class TrainableFunctionApiTest(unittest.TestCase):
self.assertEqual(trial3.last_result[TIMESTEPS_TOTAL], 5)
self.assertEqual(trial3.last_result["timesteps_this_iter"], 0)
def testCheckpointDict(self):
class TestTrain(Trainable):
def _setup(self, config):
self.state = {"hi": 1}
def _train(self):
return dict(timesteps_this_iter=1, done=True)
def _save(self, path):
return self.state
def _restore(self, state):
self.state = state
test_trainable = TestTrain()
result = test_trainable.save()
test_trainable.state["hi"] = 2
test_trainable.restore(result)
self.assertEqual(test_trainable.state["hi"], 1)
trials = run_experiments({
"foo": {
"run": TestTrain,
"checkpoint_at_end": True
}
})
for trial in trials:
self.assertEqual(trial.status, Trial.TERMINATED)
self.assertTrue(trial.has_checkpoint())
def testMultipleCheckpoints(self):
class TestTrain(Trainable):
def _setup(self, config):
self.state = {"hi": 1, "iter": 0}
def _train(self):
self.state["iter"] += 1
return dict(timesteps_this_iter=1, done=True)
def _save(self, path):
return self.state
def _restore(self, state):
self.state = state
test_trainable = TestTrain()
checkpoint_1 = test_trainable.save()
test_trainable.train()
checkpoint_2 = test_trainable.save()
self.assertNotEqual(checkpoint_1, checkpoint_2)
test_trainable.restore(checkpoint_2)
self.assertEqual(test_trainable.state["iter"], 1)
test_trainable.restore(checkpoint_1)
self.assertEqual(test_trainable.state["iter"], 0)
trials = run_experiments({
"foo": {
"run": TestTrain,
"checkpoint_at_end": True
}
})
for trial in trials:
self.assertEqual(trial.status, Trial.TERMINATED)
self.assertTrue(trial.has_checkpoint())
class RunExperimentTest(unittest.TestCase):
def setUp(self):
+52 -20
View File
@@ -4,6 +4,7 @@ from __future__ import print_function
from datetime import datetime
import copy
import gzip
import io
import logging
@@ -83,7 +84,7 @@ class Trainable(object):
self._timesteps_since_restore = 0
self._iterations_since_restore = 0
self._restored = False
self._setup()
self._setup(copy.deepcopy(self.config))
self._local_ip = ray.services.get_node_ip_address()
@classmethod
@@ -143,6 +144,8 @@ class Trainable(object):
start = time.time()
result = self._train()
assert isinstance(result, dict), "_train() needs to return a dict."
result = result.copy()
self._iteration += 1
@@ -211,11 +214,27 @@ class Trainable(object):
Checkpoint path that may be passed to restore().
"""
checkpoint_path = self._save(checkpoint_dir or self.logdir)
pickle.dump([
self._experiment_id, self._iteration, self._timesteps_total,
self._time_total, self._episodes_total
], open(checkpoint_path + ".tune_metadata", "wb"))
checkpoint_path = tempfile.mkdtemp(
prefix="checkpoint_{}".format(self._iteration),
dir=checkpoint_dir or self.logdir)
checkpoint = self._save(checkpoint_path)
saved_as_dict = False
if isinstance(checkpoint, str):
checkpoint_path = checkpoint
elif isinstance(checkpoint, dict):
saved_as_dict = True
pickle.dump(checkpoint, open(checkpoint_path + ".tune_state",
"wb"))
else:
raise ValueError("Return value from `_save` must be dict or str.")
pickle.dump({
"experiment_id": self._experiment_id,
"iteration": self._iteration,
"timesteps_total": self._timesteps_total,
"time_total": self._time_total,
"episodes_total": self._episodes_total,
"saved_as_dict": saved_as_dict
}, open(checkpoint_path + ".tune_metadata", "wb"))
return checkpoint_path
def save_to_object(self):
@@ -259,13 +278,19 @@ class Trainable(object):
This method restores additional metadata saved with the checkpoint.
"""
self._restore(checkpoint_path)
metadata = pickle.load(open(checkpoint_path + ".tune_metadata", "rb"))
self._experiment_id = metadata[0]
self._iteration = metadata[1]
self._timesteps_total = metadata[2]
self._time_total = metadata[3]
self._episodes_total = metadata[4]
self._experiment_id = metadata["experiment_id"]
self._iteration = metadata["iteration"]
self._timesteps_total = metadata["timesteps_total"]
self._time_total = metadata["time_total"]
self._episodes_total = metadata["episodes_total"]
saved_as_dict = metadata["saved_as_dict"]
if saved_as_dict:
with open(checkpoint_path + ".tune_state", "rb") as loaded_state:
checkpoint_dict = pickle.load(loaded_state)
self._restore(checkpoint_dict)
else:
self._restore(checkpoint_path)
self._restored = True
def restore_from_object(self, obj):
@@ -321,27 +346,34 @@ class Trainable(object):
can be stored.
Returns:
Checkpoint path that may be passed to restore(). Typically
would default to `checkpoint_dir`.
checkpoint (str | dict): If string, the return value is
expected to be the checkpoint path that will be passed to
`_restore()`. If dict, the return value will be automatically
serialized by Tune and passed to `_restore()`.
Examples:
>>> checkpoint_data = trainable._save(checkpoint_dir)
>>> trainable2._restore(checkpoint_data)
"""
raise NotImplementedError
def _restore(self, checkpoint_path):
def _restore(self, checkpoint):
"""Subclasses should override this to implement restore().
Args:
checkpoint_path (str): The directory where the checkpoint
is stored.
checkpoint (str | dict): Value as returned by `_save`.
If a string, then it is the checkpoint path.
"""
raise NotImplementedError
def _setup(self):
def _setup(self, config):
"""Subclasses should override this for custom initialization.
Subclasses can access the hyperparameter configuration via
``self.config``.
Args:
config (dict): Hyperparameters and other configs given.
Copy of `self.config`.
"""
pass
+10 -4
View File
@@ -8,6 +8,7 @@ import logging
import time
import tempfile
import os
from numbers import Number
import ray
from ray.tune import TuneError
@@ -33,12 +34,14 @@ class Resources(
namedtuple("Resources", ["cpu", "gpu", "extra_cpu", "extra_gpu"])):
"""Ray resources required to schedule a trial.
TODO: Custom resources.
Attributes:
cpu (int): Number of CPUs to allocate to the trial.
gpu (int): Number of GPUs to allocate to the trial.
extra_cpu (int): Extra CPUs to reserve in case the trial needs to
cpu (float): Number of CPUs to allocate to the trial.
gpu (float): Number of GPUs to allocate to the trial.
extra_cpu (float): Extra CPUs to reserve in case the trial needs to
launch additional Ray actors that use CPUs.
extra_gpu (int): Extra GPUs to reserve in case the trial needs to
extra_gpu (float): Extra GPUs to reserve in case the trial needs to
launch additional Ray actors that use GPUs.
"""
@@ -46,6 +49,9 @@ class Resources(
__slots__ = ()
def __new__(cls, cpu, gpu, extra_cpu=0, extra_gpu=0):
for entry in [cpu, gpu, extra_cpu, extra_gpu]:
assert isinstance(entry, Number), "Improper resource value."
assert entry >= 0, "Resource cannot be negative."
return super(Resources, cls).__new__(cls, cpu, gpu, extra_cpu,
extra_gpu)