mirror of
https://github.com/wassname/ray.git
synced 2026-06-27 19:00:36 +08:00
[tune] Docs for tune-sklearn (#9129)
Co-authored-by: krfricke <krfricke@users.noreply.github.com>
This commit is contained in:
@@ -94,6 +94,9 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
||||
python /ray/python/ray/tune/examples/hyperopt_example.py \
|
||||
--smoke-test
|
||||
|
||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 $DOCKER_SHA \
|
||||
python /ray/doc/source/tune/_tutorials/tune-sklearn.py
|
||||
|
||||
# if [[ ! -z "$SIGOPT_KEY" ]]; then
|
||||
# $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} --memory-swap=-1 -e SIGOPT_KEY $DOCKER_SHA \
|
||||
# python /ray/python/ray/tune/examples/sigopt_example.py \
|
||||
|
||||
@@ -24,3 +24,4 @@ sphinx_rtd_theme
|
||||
tabulate
|
||||
uvicorn
|
||||
werkzeug
|
||||
git+https://github.com/ray-project/tune-sklearn.git#egg=tune-sklearn
|
||||
|
||||
+7
-3
@@ -25,8 +25,8 @@ import mock
|
||||
MOCK_MODULES = [
|
||||
"blist", "gym", "gym.spaces", "psutil", "ray._raylet",
|
||||
"ray.core.generated", "ray.core.generated.gcs_pb2",
|
||||
"ray.core.generated.ray.protocol.Task", "scipy", "scipy.signal",
|
||||
"scipy.stats", "setproctitle", "tensorflow_probability", "tensorflow",
|
||||
"ray.core.generated.ray.protocol.Task", "scipy.signal", "scipy.stats",
|
||||
"setproctitle", "tensorflow_probability", "tensorflow",
|
||||
"tensorflow.contrib", "tensorflow.contrib.all_reduce", "tree",
|
||||
"tensorflow.contrib.all_reduce.python", "tensorflow.contrib.layers",
|
||||
"tensorflow.contrib.rnn", "tensorflow.contrib.slim", "tensorflow.core",
|
||||
@@ -35,6 +35,9 @@ MOCK_MODULES = [
|
||||
"torch.nn.parallel", "torch.utils.data", "torch.utils.data.distributed",
|
||||
"zoopt"
|
||||
]
|
||||
import scipy.stats
|
||||
import scipy.linalg
|
||||
|
||||
for mod_name in MOCK_MODULES:
|
||||
sys.modules[mod_name] = mock.Mock()
|
||||
# ray.rllib.models.action_dist.py and
|
||||
@@ -80,7 +83,8 @@ versionwarning_messages = {
|
||||
|
||||
versionwarning_body_selector = "div.document"
|
||||
sphinx_gallery_conf = {
|
||||
"examples_dirs": ["../examples", "tune/_tutorials"], # path to example scripts
|
||||
"examples_dirs": ["../examples",
|
||||
"tune/_tutorials"], # path to example scripts
|
||||
# path where to save generated examples
|
||||
"gallery_dirs": ["auto_examples", "tune/tutorials"],
|
||||
"ignore_pattern": "../examples/doc_code/",
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 81 KiB |
@@ -1,10 +1,12 @@
|
||||
.. _ray-joblib:
|
||||
|
||||
Distributed Scikit-learn / Joblib
|
||||
=================================
|
||||
|
||||
.. _`issue on GitHub`: https://github.com/ray-project/ray/issues
|
||||
|
||||
Ray supports running distributed `scikit-learn`_ programs by
|
||||
implementing a Ray backend for `joblib`_ using `Ray Actors <actors.html>`__
|
||||
Ray supports running distributed `scikit-learn`_ programs by
|
||||
implementing a Ray backend for `joblib`_ using `Ray Actors <actors.html>`__
|
||||
instead of local processes. This makes it easy to scale existing applications
|
||||
that use scikit-learn from a single node to a cluster.
|
||||
|
||||
@@ -19,12 +21,12 @@ that use scikit-learn from a single node to a cluster.
|
||||
Quickstart
|
||||
----------
|
||||
|
||||
To get started, first `install Ray <installation.html>`__, then use
|
||||
To get started, first `install Ray <installation.html>`__, then use
|
||||
``from ray.util.joblib import register_ray`` and run ``register_ray()``.
|
||||
This will register Ray as a joblib backend for scikit-learn to use.
|
||||
Then run your original scikit-learn code inside
|
||||
``with joblib.parallel_backend('ray')``. This will start a local Ray cluster.
|
||||
See the `Run on a Cluster`_ section below for instructions to run on
|
||||
Then run your original scikit-learn code inside
|
||||
``with joblib.parallel_backend('ray')``. This will start a local Ray cluster.
|
||||
See the `Run on a Cluster`_ section below for instructions to run on
|
||||
a multi-node Ray cluster instead.
|
||||
|
||||
.. code-block:: python
|
||||
@@ -62,6 +64,6 @@ You can also start Ray manually by calling ``ray.init()`` (with any of its suppo
|
||||
configuration options) before calling ``with joblib.parallel_backend('ray')``.
|
||||
|
||||
.. warning::
|
||||
|
||||
|
||||
If you do not set the ``RAY_ADDRESS`` environment variable and do not provide
|
||||
``address`` in ``ray.init(address=<address>)`` then scikit-learn will run on a SINGLE node!
|
||||
|
||||
@@ -46,9 +46,9 @@ These pages will demonstrate the various features and configurations of Tune.
|
||||
<div class="sphx-glr-bigcontainer">
|
||||
|
||||
.. customgalleryitem::
|
||||
:tooltip: A guide to Tune features.
|
||||
:tooltip: Tune User Guide
|
||||
:figure: /images/tune.png
|
||||
:description: :doc:`A guide to Tune features <tune-usage>`
|
||||
:description: :doc:`Tune User Guide <tune-usage>`
|
||||
|
||||
.. customgalleryitem::
|
||||
:tooltip: A simple guide to Population-based Training
|
||||
@@ -60,6 +60,11 @@ These pages will demonstrate the various features and configurations of Tune.
|
||||
:figure: /images/tune.png
|
||||
:description: :doc:`A guide to distributed hyperparameter tuning <tune-distributed>`
|
||||
|
||||
.. customgalleryitem::
|
||||
:tooltip: Tune's Scikit-Learn Adapters
|
||||
:figure: /images/tune-sklearn.png
|
||||
:description: :doc:`Tune's Scikit-Learn Adapters <tune-sklearn>`
|
||||
|
||||
.. customgalleryitem::
|
||||
:tooltip: Tuning PyTorch Lightning modules
|
||||
:figure: /images/pytorch_lightning_small.png
|
||||
@@ -81,6 +86,7 @@ These pages will demonstrate the various features and configurations of Tune.
|
||||
tune-usage.rst
|
||||
tune-advanced-tutorial.rst
|
||||
tune-distributed.rst
|
||||
tune-sklearn.rst
|
||||
tune-pytorch-lightning.rst
|
||||
tune-xgboost.rst
|
||||
|
||||
@@ -145,6 +151,7 @@ General Examples
|
||||
- `async_hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/async_hyperband_example.py>`__: Example of using a Trainable class with AsyncHyperBandScheduler.
|
||||
- `hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__: Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration. Also uses the AsyncHyperBandScheduler.
|
||||
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
|
||||
- `PBT with Function API <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_function.py>`__: Example of using the function API with a PopulationBasedTraining scheduler.
|
||||
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
|
||||
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
|
||||
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
# flake8: noqa
|
||||
"""
|
||||
Tune's Scikit Learn Adapters
|
||||
============================
|
||||
|
||||
Scikit-Learn is one of the most widely used tools in the ML community for working with data, offering dozens of easy-to-use machine learning algorithms. However, to achieve high performance for these algorithms, you often need to perform **model selection**.
|
||||
|
||||
|
||||
.. image:: /images/tune-sklearn.png
|
||||
:align: center
|
||||
:width: 50%
|
||||
|
||||
Scikit-Learn `has an existing module for model selection <https://scikit-learn.org/stable/modules/grid_search.html>`_, but the algorithms offered (Grid Search/``GridSearchCV`` and Random Search/``RandomizedSearchCV``) are often considered inefficient. In this tutorial, we'll cover ``tune-sklearn``, a drop-in replacement for Scikit-Learn's model selection module with state-of-the-art optimization features such as early stopping and Bayesian Optimization.
|
||||
|
||||
.. tip:: Check out the `tune-sklearn code`_ and :ref:`documentation <tune-sklearn-docs>`.
|
||||
|
||||
.. _`tune-sklearn code`: https://github.com/ray-project/tune-sklearn
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
``tune-sklearn`` is a module that integrates Ray Tune's hyperparameter tuning and scikit-learn's Classifier API. ``tune-sklearn`` has two APIs: :ref:`TuneSearchCV <tunesearchcv-docs>`, and :ref:`TuneGridSearchCV <tunegridsearchcv-docs>`. They are drop-in replacements for Scikit-learn's RandomizedSearchCV and GridSearchCV, so you only need to change less than 5 lines in a standard Scikit-Learn script to use the API.
|
||||
|
||||
Ray Tune's Scikit-learn APIs allows you to easily leverage Bayesian Optimization, HyperBand, and other cutting edge tuning techniques by simply toggling a few parameters. It also supports and provides examples for many other frameworks with Scikit-Learn wrappers such as Skorch (Pytorch), KerasClassifiers (Keras), and XGBoostClassifiers (XGBoost).
|
||||
|
||||
Run ``pip install ray[tune] tune-sklearn`` to get started.
|
||||
|
||||
Walkthrough
|
||||
-----------
|
||||
|
||||
Let's compare Tune's Scikit-Learn APIs to the standard scikit-learn GridSearchCV. For this example, we'll be using ``TuneGridSearchCV`` with a `SGDClassifier`_.
|
||||
|
||||
.. _`digits dataset`: https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html
|
||||
.. _`SGDClassifier`: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html
|
||||
|
||||
To start out, change the import statement to get tune-scikit-learn’s grid search cross validation interface:
|
||||
|
||||
"""
|
||||
# from sklearn.model_selection import GridSearchCV
|
||||
from ray.tune.sklearn import TuneGridSearchCV
|
||||
|
||||
#######################################################################
|
||||
# And from there, we would proceed just like how we would in Scikit-Learn’s interface!
|
||||
#
|
||||
# The `SGDClassifier`_ has a ``partial_fit`` API, which enables it to stop fitting to the data for a certain hyperparameter configuration.
|
||||
# If the estimator does not support early stopping, we would fall back to a parallel grid search.
|
||||
|
||||
# Other imports
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn.datasets import make_classification
|
||||
import numpy as np
|
||||
|
||||
# Create dataset
|
||||
X, y = make_classification(
|
||||
n_samples=11000,
|
||||
n_features=1000,
|
||||
n_informative=50,
|
||||
n_redundant=0,
|
||||
n_classes=10,
|
||||
class_sep=2.5)
|
||||
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=1000)
|
||||
|
||||
# Example parameters to tune from SGDClassifier
|
||||
parameter_grid = {"alpha": [1e-4, 1e-1, 1], "epsilon": [0.01, 0.1]}
|
||||
|
||||
#######################################################################
|
||||
# As you can see, the setup here is exactly how you would do it for Scikit-Learn. Now, let's try fitting a model.
|
||||
|
||||
tune_search = TuneGridSearchCV(
|
||||
SGDClassifier(),
|
||||
parameter_grid,
|
||||
early_stopping=True,
|
||||
max_iters=10)
|
||||
|
||||
import time # Just to compare fit times
|
||||
start = time.time()
|
||||
tune_search.fit(x_train, y_train)
|
||||
end = time.time()
|
||||
print("Tune GridSearch Fit Time:", end - start)
|
||||
# Tune GridSearch Fit Time: 15.436315774917603 (for an 8 core laptop)
|
||||
|
||||
#######################################################################
|
||||
# Note the slight differences we introduced above:
|
||||
#
|
||||
# * a `early_stopping`, and
|
||||
# * a specification of `max_iters` parameter
|
||||
#
|
||||
# The ``early_stopping`` parameter allows us to terminate unpromising configurations. If ``early_stopping=True``,
|
||||
# TuneGridSearchCV will default to using Tune's ASHAScheduler. You can pass in a custom
|
||||
# algorithm - see :ref:`Tune's documentation on schedulers <tune-schedulers>` here for a full list to choose from.
|
||||
# ``max_iters`` is the maximum number of iterations a given hyperparameter set could run for; it may run for fewer iterations if it is early stopped.
|
||||
#
|
||||
# Try running this compared to the GridSearchCV equivalent, and see the speedup for yourself!
|
||||
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
# n_jobs=-1 enables use of all cores like Tune does
|
||||
sklearn_search = GridSearchCV(SGDClassifier(), parameter_grid, n_jobs=-1)
|
||||
|
||||
start = time.time()
|
||||
sklearn_search.fit(x_train, y_train)
|
||||
end = time.time()
|
||||
print("Sklearn Fit Time:", end - start)
|
||||
# Sklearn Fit Time: 47.48055911064148 (for an 8 core laptop)
|
||||
|
||||
###################################################################
|
||||
# Using Bayesian Optimization
|
||||
# ---------------------------
|
||||
#
|
||||
# In addition to the grid search interface, tune-sklearn also provides an interface, TuneSearchCV, for sampling from **distributions of hyperparameters**.
|
||||
#
|
||||
# In addition, you can easily enable Bayesian optimization over the distributions in only 2 lines of code:
|
||||
|
||||
# First run `pip install bayesian-optimization`
|
||||
from ray.tune.sklearn import TuneSearchCV
|
||||
from sklearn.linear_model import SGDClassifier
|
||||
from sklearn import datasets
|
||||
from sklearn.model_selection import train_test_split
|
||||
import numpy as np
|
||||
|
||||
digits = datasets.load_digits()
|
||||
x = digits.data
|
||||
y = digits.target
|
||||
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.2)
|
||||
|
||||
clf = SGDClassifier()
|
||||
parameter_grid = {"alpha": (1e-4, 1), "epsilon": (0.01, 0.1)}
|
||||
|
||||
tune_search = TuneSearchCV(
|
||||
clf,
|
||||
parameter_grid,
|
||||
search_optimization="bayesian",
|
||||
n_iter=3,
|
||||
early_stopping=True,
|
||||
max_iters=10,
|
||||
)
|
||||
tune_search.fit(x_train, y_train)
|
||||
print(tune_search.best_params_)
|
||||
# {'alpha': 0.37460266483547777, 'epsilon': 0.09556428757689246}
|
||||
|
||||
################################################################
|
||||
# As you can see, it’s very simple to integrate tune-sklearn into existing code. Distributed execution is also easy - you can simply run ``ray.init(address="auto")`` before
|
||||
# TuneSearchCV to connect to the Ray cluster and parallelize tuning across multiple nodes, as you would in any other Ray Tune script.
|
||||
#
|
||||
#
|
||||
# Code Examples
|
||||
# -------------
|
||||
#
|
||||
# Check out more detailed examples and get started with tune-sklearn!
|
||||
#
|
||||
# * `Skorch with tune-sklearn <https://github.com/ray-project/tune-sklearn/blob/master/examples/torch_nn.py>`_
|
||||
# * `Scikit-Learn Pipelines with tune-sklearn <https://github.com/ray-project/tune-sklearn/blob/master/examples/sklearn_pipeline.py>`_
|
||||
# * `XGBoost with tune-sklearn <https://github.com/ray-project/tune-sklearn/blob/master/examples/xgbclassifier.py>`_
|
||||
# * `KerasClassifier with tune-sklearn <https://github.com/ray-project/tune-sklearn/blob/master/examples/keras_example.py>`_
|
||||
# * `LightGBM with tune-sklearn <https://github.com/ray-project/tune-sklearn/blob/master/examples/lgbm.py>`_
|
||||
#
|
||||
#
|
||||
# Further Reading
|
||||
# ---------------
|
||||
#
|
||||
# If you're using scikit-learn for other tasks, take a look at Ray’s :ref:`replacement for joblib <ray-joblib>`, which allows users to parallelize scikit learn jobs over multiple nodes.
|
||||
@@ -1,5 +1,5 @@
|
||||
Training (tune.run, tune.Experiment)
|
||||
====================================
|
||||
Execution (tune.run, tune.Experiment)
|
||||
=====================================
|
||||
|
||||
.. _tune-run-ref:
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ on `Github`_.
|
||||
grid_random.rst
|
||||
suggestion.rst
|
||||
schedulers.rst
|
||||
sklearn.rst
|
||||
logging.rst
|
||||
internals.rst
|
||||
client.rst
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
.. _tune-sklearn-docs:
|
||||
|
||||
Scikit-Learn API (tune.sklearn)
|
||||
================================
|
||||
|
||||
.. _tunegridsearchcv-docs:
|
||||
|
||||
.. autoclass:: ray.tune.sklearn.TuneGridSearchCV
|
||||
:inherited-members:
|
||||
|
||||
.. _tunesearchcv-docs:
|
||||
|
||||
.. autoclass:: ray.tune.sklearn.TuneSearchCV
|
||||
:inherited-members:
|
||||
@@ -32,3 +32,4 @@ xgboost
|
||||
zoopt>=0.4.0
|
||||
timm
|
||||
dataclasses
|
||||
git+https://github.com/ray-project/tune-sklearn.git#egg=tune-sklearn
|
||||
|
||||
@@ -14,6 +14,7 @@ General Examples
|
||||
- `async_hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/async_hyperband_example.py>`__: Example of using a Trainable class with AsyncHyperBandScheduler.
|
||||
- `hyperband_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/hyperband_example.py>`__: Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration. Also uses the AsyncHyperBandScheduler.
|
||||
- `pbt_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_example.py>`__: Example of using a Trainable class with PopulationBasedTraining scheduler.
|
||||
- `PBT with Function API <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_function.py>`__: Example of using the function API with a PopulationBasedTraining scheduler.
|
||||
- `pbt_ppo_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/pbt_ppo_example.py>`__: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler.
|
||||
- `logging_example <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__: Example of custom loggers and custom trial directory naming.
|
||||
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
TuneSearchCV = None
|
||||
TuneGridSearchCV = None
|
||||
|
||||
try:
|
||||
from tune_sklearn import TuneSearchCV, TuneGridSearchCV
|
||||
except ImportError:
|
||||
logger.info("tune_sklearn is not installed. Please run "
|
||||
"`pip install tune-sklearn`.")
|
||||
|
||||
__all__ = ["TuneSearchCV", "TuneGridSearchCV"]
|
||||
Reference in New Issue
Block a user