mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:32:11 +08:00
[tune] Tweaks to Trainable and Verbosity (#2889)
This commit is contained in:
@@ -25,10 +25,13 @@ By default, Tune uses the `default search space and variant generation process <
|
||||
:noindex:
|
||||
|
||||
|
||||
Note that other search algorithms will not necessarily extend this class and may require a different search space declaration than the default Tune format.
|
||||
|
||||
HyperOpt Search (Tree-structured Parzen Estimators)
|
||||
---------------------------------------------------
|
||||
|
||||
The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization.
|
||||
The ``HyperOptSearch`` is a SearchAlgorithm that is backed by `HyperOpt <http://hyperopt.github.io/hyperopt>`__ to perform sequential model-based hyperparameter optimization. Note that this class does not extend ``ray.tune.suggest.BasicVariantGenerator``, so you will not be able to use Tune's default variant generation/search space declaration when using HyperOptSearch.
|
||||
|
||||
In order to use this search algorithm, you will need to install HyperOpt via the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
@@ -47,7 +50,6 @@ An example of this can be found in `hyperopt_example.py <https://github.com/ray-
|
||||
:show-inheritance:
|
||||
:noindex:
|
||||
|
||||
|
||||
Contributing a New Algorithm
|
||||
----------------------------
|
||||
|
||||
|
||||
@@ -134,6 +134,9 @@ Tune Search Space (Default)
|
||||
|
||||
You can use ``tune.grid_search`` to specify an axis of a grid search. By default, Tune also supports sampling parameters from user-specified lambda functions, which can be used independently or in combination with grid search.
|
||||
|
||||
.. note::
|
||||
If you specify an explicit Search Algorithm such as any SuggestionAlgorithm, you may not be able to specify lambdas or grid search with this interface, as the search algorithm may require a different search space declaration.
|
||||
|
||||
The following shows grid search over two nested parameters combined with random sampling from two lambda functions, generating 9 different trials. Note that the value of ``beta`` depends on the value of ``alpha``, which is represented by referencing ``spec.config.alpha`` in the lambda function. This lets you specify conditional parameter distributions.
|
||||
|
||||
.. code-block:: python
|
||||
@@ -157,9 +160,6 @@ The following shows grid search over two nested parameters combined with random
|
||||
.. note::
|
||||
Lambda functions will be evaluated during trial variant generation. If you need to pass a literal function in your config, use ``tune.function(...)`` to escape it.
|
||||
|
||||
.. warning::
|
||||
If you specify a Search Algorithm, you may not be able to use this feature, as the algorithm may require a different search space declaration.
|
||||
|
||||
For more information on variant generation, see `basic_variant.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/suggest/basic_variant.py>`__.
|
||||
|
||||
Sampling Multiple Times
|
||||
|
||||
@@ -6,5 +6,6 @@ FROM ray-project/deploy
|
||||
RUN conda install -y numpy
|
||||
RUN apt-get install -y zlib1g-dev
|
||||
RUN pip install gym[atari] opencv-python==3.2.0.8 tensorflow lz4 keras
|
||||
RUN pip install -U h5py # Mutes FutureWarnings
|
||||
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
|
||||
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
|
||||
|
||||
@@ -234,10 +234,10 @@ class Agent(Trainable):
|
||||
|
||||
return Trainable.train(self)
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
env = self._env_id
|
||||
if env:
|
||||
self.config["env"] = env
|
||||
config["env"] = env
|
||||
if _global_registry.contains(ENV_CREATOR, env):
|
||||
self.env_creator = _global_registry.get(ENV_CREATOR, env)
|
||||
else:
|
||||
@@ -248,7 +248,7 @@ class Agent(Trainable):
|
||||
|
||||
# Merge the supplied config with the class default
|
||||
merged_config = self._default_config.copy()
|
||||
merged_config = deep_update(merged_config, self.config,
|
||||
merged_config = deep_update(merged_config, config,
|
||||
self._allow_unknown_configs,
|
||||
self._allow_unknown_subkeys)
|
||||
self.config = merged_config
|
||||
|
||||
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
self.timestep = 0
|
||||
|
||||
def _train(self):
|
||||
|
||||
@@ -23,7 +23,7 @@ class MyTrainableClass(Trainable):
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
self.timestep = 0
|
||||
|
||||
def _train(self):
|
||||
|
||||
@@ -80,9 +80,9 @@ class Net(nn.Module):
|
||||
|
||||
|
||||
class TrainMNIST(Trainable):
|
||||
def _setup(self):
|
||||
args = self.config.pop("args")
|
||||
vars(args).update(self.config)
|
||||
def _setup(self, config):
|
||||
args = config.pop("args")
|
||||
vars(args).update(config)
|
||||
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
@@ -18,7 +18,7 @@ from ray.tune.schedulers import PopulationBasedTraining
|
||||
class MyTrainableClass(Trainable):
|
||||
"""Fake agent whose learning rate is determined by dummy factors."""
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
self.timestep = 0
|
||||
self.current_value = 0.0
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ class Cifar10Model(Trainable):
|
||||
model = Model(inputs=x, outputs=y, name="model1")
|
||||
return model
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
self.train_data, self.test_data = self._read_data()
|
||||
x_train = self.train_data[0]
|
||||
model = self._build_model(x_train.shape[1:])
|
||||
|
||||
@@ -128,7 +128,7 @@ def bias_variable(shape):
|
||||
class TrainMNIST(Trainable):
|
||||
"""Example MNIST trainable."""
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
global activation_fn
|
||||
|
||||
self.timestep = 0
|
||||
@@ -148,7 +148,7 @@ class TrainMNIST(Trainable):
|
||||
self.x = tf.placeholder(tf.float32, [None, 784])
|
||||
self.y_ = tf.placeholder(tf.float32, [None, 10])
|
||||
|
||||
activation_fn = getattr(tf.nn, self.config['activation'])
|
||||
activation_fn = getattr(tf.nn, config['activation'])
|
||||
|
||||
# Build the graph for the deep net
|
||||
y_conv, self.keep_prob = setupCNN(self.x)
|
||||
@@ -160,7 +160,7 @@ class TrainMNIST(Trainable):
|
||||
|
||||
with tf.name_scope('adam_optimizer'):
|
||||
train_step = tf.train.AdamOptimizer(
|
||||
self.config['learning_rate']).minimize(cross_entropy)
|
||||
config['learning_rate']).minimize(cross_entropy)
|
||||
|
||||
self.train_step = train_step
|
||||
|
||||
|
||||
@@ -90,10 +90,10 @@ class FunctionRunner(Trainable):
|
||||
_name = "func"
|
||||
_default_config = DEFAULT_CONFIG
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
entrypoint = self._trainable_func()
|
||||
self._status_reporter = StatusReporter()
|
||||
scrubbed_config = self.config.copy()
|
||||
scrubbed_config = config.copy()
|
||||
for k in self._default_config:
|
||||
if k in scrubbed_config:
|
||||
del scrubbed_config[k]
|
||||
|
||||
@@ -39,7 +39,8 @@ TIME_TOTAL_S = "time_total_s"
|
||||
TRAINING_ITERATION = "training_iteration"
|
||||
|
||||
# Where Tune writes result files by default
|
||||
DEFAULT_RESULTS_DIR = os.path.expanduser("~/ray_results")
|
||||
DEFAULT_RESULTS_DIR = (os.environ.get("TUNE_RESULT_DIR")
|
||||
or os.path.expanduser("~/ray_results"))
|
||||
|
||||
# Meta file about status under each experiment directory, can be
|
||||
# parsed by automlboard if exists.
|
||||
|
||||
@@ -50,7 +50,10 @@ class HyperBandScheduler(FIFOScheduler):
|
||||
For example, to limit trials to 10 minutes and early stop based on the
|
||||
`episode_mean_reward` attr, construct:
|
||||
|
||||
``HyperBand('time_total_s', 'episode_reward_mean', 600)``
|
||||
``HyperBand('time_total_s', 'episode_reward_mean', max_t=600)``
|
||||
|
||||
Note that Tune's stopping criteria will be applied in conjunction with
|
||||
HyperBand's early stopping mechanisms.
|
||||
|
||||
See also: https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
|
||||
|
||||
|
||||
@@ -4,7 +4,11 @@ from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
import logging
|
||||
|
||||
try:
|
||||
hyperopt_logger = logging.getLogger("hyperopt")
|
||||
hyperopt_logger.setLevel(logging.WARNING)
|
||||
import hyperopt as hpo
|
||||
except Exception as e:
|
||||
hpo = None
|
||||
@@ -47,7 +51,6 @@ class HyperOptSearch(SuggestionAlgorithm):
|
||||
>>> }
|
||||
>>> algo = HyperOptSearch(
|
||||
>>> space, max_concurrent=4, reward_attr="neg_mean_loss")
|
||||
>>> algo.add_configurations(config)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
|
||||
@@ -433,6 +433,71 @@ class TrainableFunctionApiTest(unittest.TestCase):
|
||||
self.assertEqual(trial3.last_result[TIMESTEPS_TOTAL], 5)
|
||||
self.assertEqual(trial3.last_result["timesteps_this_iter"], 0)
|
||||
|
||||
def testCheckpointDict(self):
|
||||
class TestTrain(Trainable):
|
||||
def _setup(self, config):
|
||||
self.state = {"hi": 1}
|
||||
|
||||
def _train(self):
|
||||
return dict(timesteps_this_iter=1, done=True)
|
||||
|
||||
def _save(self, path):
|
||||
return self.state
|
||||
|
||||
def _restore(self, state):
|
||||
self.state = state
|
||||
|
||||
test_trainable = TestTrain()
|
||||
result = test_trainable.save()
|
||||
test_trainable.state["hi"] = 2
|
||||
test_trainable.restore(result)
|
||||
self.assertEqual(test_trainable.state["hi"], 1)
|
||||
|
||||
trials = run_experiments({
|
||||
"foo": {
|
||||
"run": TestTrain,
|
||||
"checkpoint_at_end": True
|
||||
}
|
||||
})
|
||||
for trial in trials:
|
||||
self.assertEqual(trial.status, Trial.TERMINATED)
|
||||
self.assertTrue(trial.has_checkpoint())
|
||||
|
||||
def testMultipleCheckpoints(self):
|
||||
class TestTrain(Trainable):
|
||||
def _setup(self, config):
|
||||
self.state = {"hi": 1, "iter": 0}
|
||||
|
||||
def _train(self):
|
||||
self.state["iter"] += 1
|
||||
return dict(timesteps_this_iter=1, done=True)
|
||||
|
||||
def _save(self, path):
|
||||
return self.state
|
||||
|
||||
def _restore(self, state):
|
||||
self.state = state
|
||||
|
||||
test_trainable = TestTrain()
|
||||
checkpoint_1 = test_trainable.save()
|
||||
test_trainable.train()
|
||||
checkpoint_2 = test_trainable.save()
|
||||
self.assertNotEqual(checkpoint_1, checkpoint_2)
|
||||
test_trainable.restore(checkpoint_2)
|
||||
self.assertEqual(test_trainable.state["iter"], 1)
|
||||
test_trainable.restore(checkpoint_1)
|
||||
self.assertEqual(test_trainable.state["iter"], 0)
|
||||
|
||||
trials = run_experiments({
|
||||
"foo": {
|
||||
"run": TestTrain,
|
||||
"checkpoint_at_end": True
|
||||
}
|
||||
})
|
||||
for trial in trials:
|
||||
self.assertEqual(trial.status, Trial.TERMINATED)
|
||||
self.assertTrue(trial.has_checkpoint())
|
||||
|
||||
|
||||
class RunExperimentTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
||||
@@ -4,6 +4,7 @@ from __future__ import print_function
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import copy
|
||||
import gzip
|
||||
import io
|
||||
import logging
|
||||
@@ -83,7 +84,7 @@ class Trainable(object):
|
||||
self._timesteps_since_restore = 0
|
||||
self._iterations_since_restore = 0
|
||||
self._restored = False
|
||||
self._setup()
|
||||
self._setup(copy.deepcopy(self.config))
|
||||
self._local_ip = ray.services.get_node_ip_address()
|
||||
|
||||
@classmethod
|
||||
@@ -143,6 +144,8 @@ class Trainable(object):
|
||||
|
||||
start = time.time()
|
||||
result = self._train()
|
||||
assert isinstance(result, dict), "_train() needs to return a dict."
|
||||
|
||||
result = result.copy()
|
||||
|
||||
self._iteration += 1
|
||||
@@ -211,11 +214,27 @@ class Trainable(object):
|
||||
Checkpoint path that may be passed to restore().
|
||||
"""
|
||||
|
||||
checkpoint_path = self._save(checkpoint_dir or self.logdir)
|
||||
pickle.dump([
|
||||
self._experiment_id, self._iteration, self._timesteps_total,
|
||||
self._time_total, self._episodes_total
|
||||
], open(checkpoint_path + ".tune_metadata", "wb"))
|
||||
checkpoint_path = tempfile.mkdtemp(
|
||||
prefix="checkpoint_{}".format(self._iteration),
|
||||
dir=checkpoint_dir or self.logdir)
|
||||
checkpoint = self._save(checkpoint_path)
|
||||
saved_as_dict = False
|
||||
if isinstance(checkpoint, str):
|
||||
checkpoint_path = checkpoint
|
||||
elif isinstance(checkpoint, dict):
|
||||
saved_as_dict = True
|
||||
pickle.dump(checkpoint, open(checkpoint_path + ".tune_state",
|
||||
"wb"))
|
||||
else:
|
||||
raise ValueError("Return value from `_save` must be dict or str.")
|
||||
pickle.dump({
|
||||
"experiment_id": self._experiment_id,
|
||||
"iteration": self._iteration,
|
||||
"timesteps_total": self._timesteps_total,
|
||||
"time_total": self._time_total,
|
||||
"episodes_total": self._episodes_total,
|
||||
"saved_as_dict": saved_as_dict
|
||||
}, open(checkpoint_path + ".tune_metadata", "wb"))
|
||||
return checkpoint_path
|
||||
|
||||
def save_to_object(self):
|
||||
@@ -259,13 +278,19 @@ class Trainable(object):
|
||||
This method restores additional metadata saved with the checkpoint.
|
||||
"""
|
||||
|
||||
self._restore(checkpoint_path)
|
||||
metadata = pickle.load(open(checkpoint_path + ".tune_metadata", "rb"))
|
||||
self._experiment_id = metadata[0]
|
||||
self._iteration = metadata[1]
|
||||
self._timesteps_total = metadata[2]
|
||||
self._time_total = metadata[3]
|
||||
self._episodes_total = metadata[4]
|
||||
self._experiment_id = metadata["experiment_id"]
|
||||
self._iteration = metadata["iteration"]
|
||||
self._timesteps_total = metadata["timesteps_total"]
|
||||
self._time_total = metadata["time_total"]
|
||||
self._episodes_total = metadata["episodes_total"]
|
||||
saved_as_dict = metadata["saved_as_dict"]
|
||||
if saved_as_dict:
|
||||
with open(checkpoint_path + ".tune_state", "rb") as loaded_state:
|
||||
checkpoint_dict = pickle.load(loaded_state)
|
||||
self._restore(checkpoint_dict)
|
||||
else:
|
||||
self._restore(checkpoint_path)
|
||||
self._restored = True
|
||||
|
||||
def restore_from_object(self, obj):
|
||||
@@ -321,27 +346,34 @@ class Trainable(object):
|
||||
can be stored.
|
||||
|
||||
Returns:
|
||||
Checkpoint path that may be passed to restore(). Typically
|
||||
would default to `checkpoint_dir`.
|
||||
checkpoint (str | dict): If string, the return value is
|
||||
expected to be the checkpoint path that will be passed to
|
||||
`_restore()`. If dict, the return value will be automatically
|
||||
serialized by Tune and passed to `_restore()`.
|
||||
|
||||
Examples:
|
||||
>>> checkpoint_data = trainable._save(checkpoint_dir)
|
||||
>>> trainable2._restore(checkpoint_data)
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def _restore(self, checkpoint_path):
|
||||
def _restore(self, checkpoint):
|
||||
"""Subclasses should override this to implement restore().
|
||||
|
||||
Args:
|
||||
checkpoint_path (str): The directory where the checkpoint
|
||||
is stored.
|
||||
checkpoint (str | dict): Value as returned by `_save`.
|
||||
If a string, then it is the checkpoint path.
|
||||
"""
|
||||
|
||||
raise NotImplementedError
|
||||
|
||||
def _setup(self):
|
||||
def _setup(self, config):
|
||||
"""Subclasses should override this for custom initialization.
|
||||
|
||||
Subclasses can access the hyperparameter configuration via
|
||||
``self.config``.
|
||||
Args:
|
||||
config (dict): Hyperparameters and other configs given.
|
||||
Copy of `self.config`.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ import logging
|
||||
import time
|
||||
import tempfile
|
||||
import os
|
||||
from numbers import Number
|
||||
|
||||
import ray
|
||||
from ray.tune import TuneError
|
||||
@@ -33,12 +34,14 @@ class Resources(
|
||||
namedtuple("Resources", ["cpu", "gpu", "extra_cpu", "extra_gpu"])):
|
||||
"""Ray resources required to schedule a trial.
|
||||
|
||||
TODO: Custom resources.
|
||||
|
||||
Attributes:
|
||||
cpu (int): Number of CPUs to allocate to the trial.
|
||||
gpu (int): Number of GPUs to allocate to the trial.
|
||||
extra_cpu (int): Extra CPUs to reserve in case the trial needs to
|
||||
cpu (float): Number of CPUs to allocate to the trial.
|
||||
gpu (float): Number of GPUs to allocate to the trial.
|
||||
extra_cpu (float): Extra CPUs to reserve in case the trial needs to
|
||||
launch additional Ray actors that use CPUs.
|
||||
extra_gpu (int): Extra GPUs to reserve in case the trial needs to
|
||||
extra_gpu (float): Extra GPUs to reserve in case the trial needs to
|
||||
launch additional Ray actors that use GPUs.
|
||||
|
||||
"""
|
||||
@@ -46,6 +49,9 @@ class Resources(
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, cpu, gpu, extra_cpu=0, extra_gpu=0):
|
||||
for entry in [cpu, gpu, extra_cpu, extra_gpu]:
|
||||
assert isinstance(entry, Number), "Improper resource value."
|
||||
assert entry >= 0, "Resource cannot be negative."
|
||||
return super(Resources, cls).__new__(cls, cpu, gpu, extra_cpu,
|
||||
extra_gpu)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user