[tune] ASHA xgboost and lightgbm examples (#5500)

This commit is contained in:
Richard Liaw
2019-08-22 10:37:59 -07:00
committed by GitHub
parent e2e30ca507
commit cdc9227f1b
13 changed files with 219 additions and 66 deletions
+2 -2
View File
@@ -10,11 +10,11 @@ from ray.tune.registry import register_env, register_trainable
from ray.tune.trainable import Trainable
from ray.tune.suggest import grid_search
from ray.tune.sample import (function, sample_from, uniform, choice, randint,
randn)
randn, loguniform)
__all__ = [
"Trainable", "TuneError", "grid_search", "register_env",
"register_trainable", "run", "run_experiments", "Experiment", "function",
"sample_from", "track", "uniform", "choice", "randint", "randn",
"ExperimentAnalysis", "Analysis"
"loguniform", "ExperimentAnalysis", "Analysis"
]
+33 -31
View File
@@ -11,54 +11,56 @@ If any example is broken, or if you'd like to add an example to this page, feel
General Examples
----------------
- `async_hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/async_hyperband_example.py>`__:
Example of using a Trainable class with AsyncHyperBandScheduler.
- `hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__:
Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration.
- `hyperopt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperopt_example.py>`__:
Optimizes a basic function using the function-based API and the HyperOptSearch (SearchAlgorithm wrapper for HyperOpt TPE).
Also uses the AsyncHyperBandScheduler.
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__:
Example of using a Trainable class with PopulationBasedTraining scheduler.
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__:
Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__:
Example of custom loggers and custom trial directory naming.
- `async_hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/async_hyperband_example.py>`__: Example of using a Trainable class with AsyncHyperBandScheduler.
- `hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__: Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration. Also uses the AsyncHyperBandScheduler.
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
Search Algorithm Examples
-------------------------
- `Ax example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/ax_example.py>`__: Optimize a Hartmann function with `Ax <https://ax.dev>`_ with 4 parallel workers.
- `HyperOpt Example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperopt_example.py>`__: Optimizes a basic function using the function-based API and the HyperOptSearch (SearchAlgorithm wrapper for HyperOpt TPE).
- `Nevergrad example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/nevergrad_example.py>`__: Optimize a simple toy function with the gradient-free optimization package `Nevergrad <https://github.com/facebookresearch/nevergrad>`_ with 4 parallel workers.
- `Bayesian Optimization example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/bayesopt_example.py>`__: Optimize a simple toy function using `Bayesian Optimization <https://github.com/fmfn/BayesianOptimization>`_ with 4 parallel workers.
Keras Examples
--------------
- `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__:
Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
- `tune_mnist_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_keras.py>`__: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune.
PyTorch Examples
----------------
- `mnist_pytorch <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch.py>`__:
Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune.
- `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__:
Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.
- `mnist_pytorch <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch.py>`__: Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune.
- `mnist_pytorch_trainable <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mnist_pytorch_trainable.py>`__: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.
TensorFlow Examples
-------------------
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__:
A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__:
A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__:
Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
- `tune_mnist_ray <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class.
- `tune_mnist_ray_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_ray_hyperband.py>`__: A basic example of tuning a TensorFlow model on MNIST using the Trainable class and the HyperBand scheduler.
- `tune_mnist_async_hyperband <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_mnist_async_hyperband.py>`__: Example of tuning a TensorFlow model on MNIST using AsyncHyperBand.
XGBoost Example
---------------
- `xgboost_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/xgboost_example.py>`__: Trains a basic XGBoost model with Tune with the function-based API and a XGBoost callback.
LightGBM Example
----------------
- `lightgbm_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/lightgbm_example.py>`__: Trains a basic LightGBM model with Tune with the function-based API and a LightGBM callback.
Contributed Examples
--------------------
- `pbt_tune_cifar10_with_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py>`__:
A contributed example of tuning a Keras model on CIFAR10 with the PopulationBasedTraining scheduler.
- `genetic_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/genetic_example.py>`__:
Optimizing the michalewicz function using the contributed GeneticSearch search algorithm with AsyncHyperBandScheduler.
- `tune_cifar10_gluon <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_cifar10_gluon.py>`__:
MXNet Gluon example to use Tune with the function-based API on CIFAR-10 dataset.
- `pbt_tune_cifar10_with_keras <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py>`__: A contributed example of tuning a Keras model on CIFAR10 with the PopulationBasedTraining scheduler.
- `genetic_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/genetic_example.py>`__: Optimizing the michalewicz function using the contributed GeneticSearch search algorithm with AsyncHyperBandScheduler.
- `tune_cifar10_gluon <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/tune_cifar10_gluon.py>`__: MXNet Gluon example to use Tune with the function-based API on CIFAR-10 dataset.
@@ -0,0 +1,53 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import lightgbm as lgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
from ray import tune
def LightGBMCallback(env):
"""Assumes that `valid_0` is the target validation score."""
_, metric, score, _ = env.evaluation_result_list[0]
tune.track.log(**{metric: score})
def train_breast_cancer(config):
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(
data, target, test_size=0.25)
train_set = lgb.Dataset(train_x, label=train_y)
test_set = lgb.Dataset(test_x, label=test_y)
gbm = lgb.train(
config,
train_set,
valid_sets=[test_set],
verbose_eval=False,
callbacks=[LightGBMCallback])
preds = gbm.predict(test_x)
pred_labels = np.rint(preds)
tune.track.log(
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
done=True)
if __name__ == "__main__":
config = {
"objective": "binary",
"metric": "binary_error",
"verbose": -1,
"boosting_type": tune.grid_search(["gbdt", "dart"]),
"num_leaves": tune.randint(10, 1000),
"learning_rate": tune.loguniform(1e-8, 1e-1)
}
from ray.tune.schedulers import ASHAScheduler
tune.run(
train_breast_cancer,
config=config,
num_samples=2,
scheduler=ASHAScheduler(metric="binary_error", mode="min"))
@@ -0,0 +1,53 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import xgboost as xgb
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
from ray import tune
def XGBCallback(env):
tune.track.log(**dict(env.evaluation_result_list))
def train_breast_cancer(config):
data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
train_x, test_x, train_y, test_y = train_test_split(
data, target, test_size=0.25)
train_set = xgb.DMatrix(train_x, label=train_y)
test_set = xgb.DMatrix(test_x, label=test_y)
bst = xgb.train(
config, train_set, evals=[(test_set, "eval")], callbacks=[XGBCallback])
preds = bst.predict(test_set)
pred_labels = np.rint(preds)
tune.track.log(
mean_accuracy=sklearn.metrics.accuracy_score(test_y, pred_labels),
done=True)
if __name__ == "__main__":
num_threads = 2
config = {
"verbosity": 0,
"num_threads": num_threads,
"objective": "binary:logistic",
"booster": "gbtree",
"eval_metric": ["auc", "ams@0", "logloss"],
"max_depth": tune.randint(1, 9),
"eta": tune.loguniform(1e-4, 1e-1),
"gamma": tune.loguniform(1e-8, 1.0),
"grow_policy": tune.choice(["depthwise", "lossguide"])
}
from ray.tune.schedulers import ASHAScheduler
tune.run(
train_breast_cancer,
resources_per_trial={"cpu": num_threads},
config=config,
num_samples=2,
scheduler=ASHAScheduler(metric="eval-logloss", mode="min"))
+17
View File
@@ -56,6 +56,23 @@ def uniform(*args, **kwargs):
return sample_from(lambda _: np.random.uniform(*args, **kwargs))
def loguniform(min_bound, max_bound, base=10):
"""Sugar for sampling in different orders of magnitude.
Args:
min_bound (float): Lower boundary of the output interval (1e-4)
max_bound (float): Upper boundary of the output interval (1e-2)
base (float): Base of the log. Defaults to 10.
"""
logmin = np.log(min_bound) / np.log(base)
logmax = np.log(max_bound) / np.log(base)
def apply_log(_):
return base**(np.random.uniform(logmin, logmax))
return sample_from(apply_log)
def choice(*args, **kwargs):
"""A wrapper around np.random.choice."""
return sample_from(lambda _: np.random.choice(*args, **kwargs))
@@ -92,6 +92,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):
def on_trial_result(self, trial_runner, trial, result):
action = TrialScheduler.CONTINUE
if self._time_attr not in result or self._metric not in result:
return action
if result[self._time_attr] >= self._max_t:
action = TrialScheduler.STOP
else:
@@ -103,6 +105,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):
return action
def on_trial_complete(self, trial_runner, trial, result):
if self._time_attr not in result or self._metric not in result:
return
bracket = self._trial_info[trial.trial_id]
bracket.on_result(trial, result[self._time_attr],
self._metric_op * result[self._metric])
@@ -77,6 +77,8 @@ class MedianStoppingRule(FIFOScheduler):
value by step `t` is strictly worse than the median of the running
averages of all completed trials' objectives reported up to step `t`.
"""
if self._time_attr not in result or self._metric not in result:
return TrialScheduler.CONTINUE
if trial in self._stopped_trials:
assert not self._hard_stop
+2
View File
@@ -219,6 +219,8 @@ class PopulationBasedTraining(FIFOScheduler):
self._trial_state[trial] = PBTTrialState(trial)
def on_trial_result(self, trial_runner, trial, result):
if self._time_attr not in result or self._metric not in result:
return TrialScheduler.CONTINUE
time = result[self._time_attr]
state = self._trial_state[trial]
@@ -5,6 +5,7 @@ from __future__ import print_function
import copy
import glob
import os
import numpy as np
import shutil
import sys
import tempfile
@@ -1298,6 +1299,17 @@ class VariantGeneratorTest(unittest.TestCase):
self.assertEqual(trials[0].config, {"x": 100, "y": 1})
self.assertEqual(trials[1].config, {"x": 200, "y": 1})
def testLogUniform(self):
sampler = tune.loguniform(1e-10, 1e-1).func
results = [sampler(None) for i in range(1000)]
assert abs(np.log(min(results)) / np.log(10) - -10) < 0.1
assert abs(np.log(max(results)) / np.log(10) - -1) < 0.1
sampler_e = tune.loguniform(np.e**-4, np.e, base=np.e).func
results_e = [sampler_e(None) for i in range(1000)]
assert abs(np.log(min(results_e)) - -4) < 0.1
assert abs(np.log(max(results_e)) - 1) < 0.1
def test_resolve_dict(self):
config = {
"a": {