diff --git a/doc/source/tune/examples/cifar10_pytorch.rst b/doc/source/tune/examples/cifar10_pytorch.rst new file mode 100644 index 000000000..de3eef3c8 --- /dev/null +++ b/doc/source/tune/examples/cifar10_pytorch.rst @@ -0,0 +1,6 @@ +:orphan: + +cifar10_pytorch +~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/cifar10_pytorch.py diff --git a/doc/source/tune/examples/durable_trainable_example.rst b/doc/source/tune/examples/durable_trainable_example.rst new file mode 100644 index 000000000..1a64dda28 --- /dev/null +++ b/doc/source/tune/examples/durable_trainable_example.rst @@ -0,0 +1,6 @@ +:orphan: + +durable_trainable_example +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/durable_trainable_example.py diff --git a/doc/source/tune/examples/hyperband_function_example.rst b/doc/source/tune/examples/hyperband_function_example.rst new file mode 100644 index 000000000..292bd45a8 --- /dev/null +++ b/doc/source/tune/examples/hyperband_function_example.rst @@ -0,0 +1,6 @@ +:orphan: + +hyperband_function_example +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/hyperband_function_example.py diff --git a/doc/source/tune/examples/index.rst b/doc/source/tune/examples/index.rst index 3808e80c3..89af9deb9 100644 --- a/doc/source/tune/examples/index.rst +++ b/doc/source/tune/examples/index.rst @@ -11,84 +11,139 @@ If any example is broken, or if you'd like to add an example to this page, feel .. _tune-general-examples: General Examples -~~~~~~~~~~~~~~~~ +---------------- -- :doc:`/tune/examples/async_hyperband_example`: Example of using a Trainable class with AsyncHyperBandScheduler. -- :doc:`/tune/examples/hyperband_example`: Example of using a Trainable class with HyperBandScheduler. Also uses the Experiment class API for specifying the experiment configuration. Also uses the AsyncHyperBandScheduler. -- :doc:`/tune/examples/pbt_example`: Example of using a Trainable class with PopulationBasedTraining scheduler. + +- :doc:`/tune/examples/async_hyperband_example`: Example of using a simple tuning function with AsyncHyperBandScheduler. +- :doc:`/tune/examples/hyperband_function_example`: Example of using a Trainable function with HyperBandScheduler. Also uses the AsyncHyperBandScheduler. - :doc:`/tune/examples/pbt_function`: Example of using the function API with a PopulationBasedTraining scheduler. -- :doc:`/tune/examples/pbt_ppo_example`: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler. -- :doc:`/tune/examples/pb2_ppo_example`: Example of optimizing a distributed RLlib algorithm (PPO) with the PB2 scheduler. Uses a small population size of 4, so can train on a laptop. +- :doc:`/tune/examples/pb2_example`: Example of using the Population-based Bandits (PB2) scheduler. - :doc:`/tune/examples/logging_example`: Example of custom loggers and custom trial directory naming. +**Trainable Class Examples** + +Though it is preferable to use the Function API, Tune also supports a Class-based API for training. + +- :doc:`/tune/examples/hyperband_example`: Example of using a Trainable class with HyperBandScheduler. Also uses the AsyncHyperBandScheduler. +- :doc:`/tune/examples/pbt_example`: Example of using a Trainable class with PopulationBasedTraining scheduler. + +.. - :doc:`/tune/examples/durable_trainable_example`: Example using a durable storage mechanism in the Trainable. + + Search Algorithm Examples -~~~~~~~~~~~~~~~~~~~~~~~~~ +------------------------- -- :doc:`/tune/examples/ax_example`: Optimize a Hartmann function with `Ax `_ with 4 parallel workers. -- :doc:`/tune/examples/hyperopt_example`: Optimizes a basic function using the function-based API and the HyperOptSearch (SearchAlgorithm wrapper for HyperOpt TPE). -- :doc:`/tune/examples/nevergrad_example`: Optimize a simple toy function with the gradient-free optimization package `Nevergrad `_ with 4 parallel workers. -- :doc:`/tune/examples/bayesopt_example`: Optimize a simple toy function using `Bayesian Optimization `_ with 4 parallel workers. +- :doc:`/tune/examples/ax_example`: Example script showing usage of :ref:`AxSearch ` [`Ax website `__] +- :doc:`/tune/examples/dragonfly_example`: Example script showing usage of :ref:`DragonflySearch ` [`Dragonfly website `__] +- :doc:`/tune/examples/skopt_example`: Example script showing usage of :ref:`SkoptSearch ` [`Scikit-Optimize website `__] +- :doc:`/tune/examples/hyperopt_example`: Example script showing usage of :ref:`HyperOptSearch ` [`HyperOpt website `__] +- :doc:`/tune/examples/bayesopt_example`: Example script showing usage of :ref:`BayesOptSearch ` [`BayesianOptimization website `__] +- :doc:`/tune/examples/bohb_example`: Example script showing usage of :ref:`TuneBOHB ` [`BOHB website `__] +- :doc:`/tune/examples/nevergrad_example`: Example script showing usage of :ref:`NevergradSearch ` [`Nevergrad website `__] +- :doc:`/tune/examples/optuna_example`: Example script showing usage of :ref:`OptunaSearch ` [`Optuna website `__] +- :doc:`/tune/examples/zoopt_example`: Example script showing usage of :ref:`ZOOptSearch ` [`ZOOpt website `__] +- :doc:`/tune/examples/sigopt_example`: Example script showing usage of :ref:`SigOptSearch ` [`SigOpt website `__] -Tensorflow/Keras Examples -~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Sigopt (Contributed)** + +- :doc:`/tune/examples/sigopt_multi_objective_example`: Example using Sigopt's multi-objective functionality. +- :doc:`/tune/examples/sigopt_prior_beliefs_example`: Example using Sigopt's support for prior beliefs. + + +tune-sklearn examples +--------------------- + +See the `ray-project/tune-sklearn examples `__ for a comprehensive list of examples leveraging Tune's sklearn interface. + +- `tune-sklearn with xgboost `__ +- `tune-sklearn with sklearn pipelines `__ +- `tune-sklearn with Bayesian Optimization `__ + + +Framework-specific Examples +--------------------------- + +PyTorch +~~~~~~~ + +- :doc:`/tune/examples/mnist_pytorch`: Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune. +- :doc:`/tune/examples/ddp_mnist_torch`: An example showing how to use DistributedDataParallel with Ray Tune. This enables both distributed training and distributed hyperparameter tuning. +- :doc:`/tune/examples/cifar10_pytorch`: Uses Pytorch to tune a simple model on CIFAR10. +- :doc:`/tune/examples/pbt_convnet_function_example`: Example training a ConvNet with checkpointing in function API. + +.. - :doc:`/tune/examples/pbt_convnet_example`: Example of training a Memory NN on bAbI with Keras using PBT. +.. - :doc:`/tune/examples/mnist_pytorch_trainable`: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end. + +Pytorch Lightning +~~~~~~~~~~~~~~~~~ + +- :doc:`/tune/examples/mnist_ptl_mini`: A minimal example of using `Pytorch Lightning `_ to train a MNIST model. This example utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks `. See also :ref:`this tutorial for a full walkthrough `. +- :doc:`/tune/examples/mnist_pytorch_lightning`: A comprehensive example using `Pytorch Lightning `_ to train a MNIST model. This example showcases how to use various search optimization techniques. It utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks `. +- :ref:`A walkthrough tutorial for using Ray Tune with Pytorch-Lightning `. + +Wandb, MLFlow +~~~~~~~~~~~~~ + +- :ref:`Tutorial ` for using `wandb `__ with Ray Tune +- :doc:`/tune/examples/wandb_example`: Example for using `Weights and Biases `__ with Ray Tune. +- :doc:`/tune/examples/mlflow_example`: Example for using `MLFlow `__ with Ray Tune. + +Tensorflow/Keras +~~~~~~~~~~~~~~~~ - :doc:`/tune/examples/tune_mnist_keras`: Converts the Keras MNIST example to use Tune with the function-based API and a Keras callback. Also shows how to easily convert something relying on argparse to use Tune. - :doc:`/tune/examples/pbt_memnn_example`: Example of training a Memory NN on bAbI with Keras using PBT. - :doc:`/tune/examples/tf_mnist_example`: Converts the Advanced TF2.0 MNIST example to use Tune with the Trainable. This uses `tf.function`. Original code from tensorflow: https://www.tensorflow.org/tutorials/quickstart/advanced -Horovod Example -~~~~~~~~~~~~~~~ +MXNet +~~~~~ + +- :doc:`/tune/examples/mxnet_example`: Simple example for using MXNet with Tune. +- :doc:`/tune/examples/tune_cifar10_gluon`: MXNet Gluon example to use Tune with the function-based API on CIFAR-10 dataset. + + +Horovod +~~~~~~~ + - :doc:`/tune/examples/horovod_simple`: Leverages the :ref:`Horovod-Tune ` integration to launch a distributed training + tuning job. - -PyTorch Examples -~~~~~~~~~~~~~~~~ - -- :doc:`/tune/examples/mnist_pytorch`: Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune. -- :doc:`/tune/examples/mnist_pytorch_trainable`: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end. -- :doc:`/tune/examples/ddp_mnist_torch`: An example showing how to use DistributedDataParallel with Ray Tune. This enables both distributed training and distributed hyperparameter tuning. - -Pytorch Lightning Examples -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -- :doc:`/tune/examples/mnist_ptl_mini`: A minimal example of using `Pytorch Lightning `_ to train a MNIST model. This example utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks `. See also :ref:`this tutorial for a full walkthrough `. -- :doc:`/tune/examples/mnist_pytorch_lightning`: A comprehensive example using `Pytorch Lightning `_ to train a MNIST model. This example showcases how to use various search optimization techniques. It utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks `. See also :ref:`this tutorial for a full walkthrough `. - - -XGBoost Example -~~~~~~~~~~~~~~~ +XGBoost, LightGBM +~~~~~~~~~~~~~~~~~ - :ref:`XGBoost tutorial `: A guide to tuning XGBoost parameters with Tune. - :doc:`/tune/examples/xgboost_example`: Trains a basic XGBoost model with Tune with the function-based API and an XGBoost callback. - - -LightGBM Example -~~~~~~~~~~~~~~~~ - - :doc:`/tune/examples/lightgbm_example`: Trains a basic LightGBM model with Tune with the function-based API and a LightGBM callback. -|:hugging_face:| Huggingface Transformers Example -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RLlib +~~~~~ + +- :doc:`/tune/examples/pbt_ppo_example`: Example of optimizing a distributed RLlib algorithm (PPO) with the PopulationBasedTraining scheduler. +- :doc:`/tune/examples/pb2_ppo_example`: Example of optimizing a distributed RLlib algorithm (PPO) with the PB2 scheduler. Uses a small population size of 4, so can train on a laptop. + + +|:hugging_face:| Huggingface Transformers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - :doc:`/tune/examples/pbt_transformers`: Fine-tunes a Huggingface transformer with Tune Population Based Training. Contributed Examples -~~~~~~~~~~~~~~~~~~~~ +-------------------- - :doc:`/tune/examples/pbt_tune_cifar10_with_keras`: A contributed example of tuning a Keras model on CIFAR10 with the PopulationBasedTraining scheduler. - :doc:`/tune/examples/genetic_example`: Optimizing the michalewicz function using the contributed GeneticSearch algorithm with AsyncHyperBandScheduler. -- :doc:`/tune/examples/tune_cifar10_gluon`: MXNet Gluon example to use Tune with the function-based API on CIFAR-10 dataset. + Open Source Projects using Tune ------------------------------- Here are some of the popular open source repositories and research projects that leverage Tune. Feel free to submit a pull-request adding (or requesting a removal!) of a listed project. - - `Softlearning `_: Softlearning is a reinforcement learning framework for training maximum entropy policies in continuous domains. Includes the official implementation of the Soft Actor-Critic algorithm. - - `Flambe `_: An ML framework to accelerate research and its path to production. See `flambe.ai `_. - - `Population Based Augmentation `_: Population Based Augmentation (PBA) is a algorithm that quickly and efficiently learns data augmentation functions for neural network training. PBA matches state-of-the-art results on CIFAR with one thousand times less compute. - - `Fast AutoAugment by Kakao `_: Fast AutoAugment (Accepted at NeurIPS 2019) learns augmentation policies using a more efficient search strategy based on density matching. - - `Allentune `_: Hyperparameter Search for AllenNLP from AllenAI. - - `machinable `_: A modular configuration system for machine learning research. See `machinable.org `_. - - `NeuroCard `_: NeuroCard (Accepted at VLDB 2021) is a neural cardinality estimator for multi-table join queries. It uses state of the art deep density models to learn correlations across relational database tables. +- `Softlearning `_: Softlearning is a reinforcement learning framework for training maximum entropy policies in continuous domains. Includes the official implementation of the Soft Actor-Critic algorithm. +- `Flambe `_: An ML framework to accelerate research and its path to production. See `flambe.ai `_. +- `Population Based Augmentation `_: Population Based Augmentation (PBA) is a algorithm that quickly and efficiently learns data augmentation functions for neural network training. PBA matches state-of-the-art results on CIFAR with one thousand times less compute. +- `Fast AutoAugment by Kakao `_: Fast AutoAugment (Accepted at NeurIPS 2019) learns augmentation policies using a more efficient search strategy based on density matching. +- `Allentune `_: Hyperparameter Search for AllenNLP from AllenAI. +- `machinable `_: A modular configuration system for machine learning research. See `machinable.org `_. +- `NeuroCard `_: NeuroCard (Accepted at VLDB 2021) is a neural cardinality estimator for multi-table join queries. It uses state of the art deep density models to learn correlations across relational database tables. diff --git a/doc/source/tune/examples/mxnet_example.rst b/doc/source/tune/examples/mxnet_example.rst new file mode 100644 index 000000000..190063e93 --- /dev/null +++ b/doc/source/tune/examples/mxnet_example.rst @@ -0,0 +1,6 @@ +:orphan: + +mxnet_example +~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/mxnet_example.py diff --git a/doc/source/tune/examples/pbt_convnet_function_example.rst b/doc/source/tune/examples/pbt_convnet_function_example.rst new file mode 100644 index 000000000..d8221ea3a --- /dev/null +++ b/doc/source/tune/examples/pbt_convnet_function_example.rst @@ -0,0 +1,6 @@ +:orphan: + +pbt_convnet_function_example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/pbt_convnet_function_example.py diff --git a/doc/source/tune/examples/sigopt_multi_objective_example.rst b/doc/source/tune/examples/sigopt_multi_objective_example.rst new file mode 100644 index 000000000..203f13d33 --- /dev/null +++ b/doc/source/tune/examples/sigopt_multi_objective_example.rst @@ -0,0 +1,6 @@ +:orphan: + +sigopt_multi_objective_example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/sigopt_multi_objective_example.py diff --git a/doc/source/tune/examples/sigopt_prior_beliefs_example.rst b/doc/source/tune/examples/sigopt_prior_beliefs_example.rst new file mode 100644 index 000000000..c3d22ccc2 --- /dev/null +++ b/doc/source/tune/examples/sigopt_prior_beliefs_example.rst @@ -0,0 +1,6 @@ +:orphan: + +sigopt_prior_beliefs_example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: /../../python/ray/tune/examples/sigopt_prior_beliefs_example.py diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py index bd4dc4c59..a7646234c 100644 --- a/python/ray/tune/examples/async_hyperband_example.py +++ b/python/ray/tune/examples/async_hyperband_example.py @@ -1,45 +1,27 @@ #!/usr/bin/env python import argparse -import json -import os -import random - -import numpy as np +import time import ray -from ray.tune import Trainable, run, sample_from +from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler -class MyTrainableClass(Trainable): - """Example agent whose learning curve is a random sigmoid. +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100)**(-1) + height * 0.1 - The dummy hyperparameters "width" and "height" determine the slope and - maximum reward value reached. - """ - def setup(self, config): - self.timestep = 0 +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] - def step(self): - self.timestep += 1 - v = np.tanh(float(self.timestep) / self.config.get("width", 1)) - v *= self.config.get("height", 1) - - # Here we use `episode_reward_mean`, but you can also report other - # objectives such as loss or accuracy. - return {"episode_reward_mean": v} - - def save_checkpoint(self, checkpoint_dir): - path = os.path.join(checkpoint_dir, "checkpoint") - with open(path, "w") as f: - f.write(json.dumps({"timestep": self.timestep})) - return path - - def load_checkpoint(self, checkpoint_path): - with open(checkpoint_path) as f: - self.timestep = json.loads(f.read())["timestep"] + for step in range(config["steps"]): + # Iterative training function - can be an arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + tune.report(iterations=step, mean_loss=intermediate_score) if __name__ == "__main__": @@ -48,31 +30,33 @@ if __name__ == "__main__": "--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( "--ray-address", - help="Address of Ray cluster for seamless distributed execution.") + help="Address of Ray cluster for seamless distributed execution.", + required=False) args, _ = parser.parse_known_args() ray.init(address=args.ray_address) - # asynchronous hyperband early stopping, configured with - # `episode_reward_mean` as the - # objective and `training_iteration` as the time unit, - # which is automatically filled by Tune. - ahb = AsyncHyperBandScheduler( - time_attr="training_iteration", - metric="episode_reward_mean", - mode="max", - grace_period=5, - max_t=100) + # AsyncHyperBand enables aggressive early stopping of bad trials. + scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100) - run(MyTrainableClass, + # 'training_iteration' is incremented every time `trainable.step` is called + stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999} + + analysis = tune.run( + easy_objective, name="asynchyperband_test", - scheduler=ahb, - stop={"training_iteration": 1 if args.smoke_test else 99999}, + metric="mean_loss", + mode="min", + scheduler=scheduler, + stop=stopping_criteria, num_samples=20, + verbose=1, resources_per_trial={ "cpu": 1, "gpu": 0 }, - config={ - "width": sample_from(lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + config={ # Hyperparameter space + "steps": 100, + "width": tune.uniform(10, 100), + "height": tune.uniform(0, 100), }) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/ax_example.py b/python/ray/tune/examples/ax_example.py index 1bcb401a9..174e51691 100644 --- a/python/ray/tune/examples/ax_example.py +++ b/python/ray/tune/examples/ax_example.py @@ -1,11 +1,10 @@ -"""This test checks that AxSearch is functional. +"""This example demonstrates the usage of AxSearch with Ray Tune. It also checks that it is usable with a separate scheduler. """ import numpy as np import time -import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.ax import AxSearch @@ -52,11 +51,21 @@ if __name__ == "__main__": "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 50, - "config": { + algo = AxSearch( + max_concurrent=4, + parameter_constraints=["x1 + x2 <= 2.0"], # Optional. + outcome_constraints=["l2norm <= 1.25"], # Optional. + ) + scheduler = AsyncHyperBandScheduler() + analysis = tune.run( + easy_objective, + name="ax", + metric="hartmann6", # provided in the 'easy_objective' function + mode="min", + search_alg=algo, + scheduler=scheduler, + num_samples=10 if args.smoke_test else 50, + config={ "iterations": 100, "x1": tune.uniform(0.0, 1.0), "x2": tune.uniform(0.0, 1.0), @@ -65,21 +74,6 @@ if __name__ == "__main__": "x5": tune.uniform(0.0, 1.0), "x6": tune.uniform(0.0, 1.0), }, - "stop": { - "timesteps_total": 100 - } - } - algo = AxSearch( - max_concurrent=4, - metric="hartmann6", - mode="min", - parameter_constraints=["x1 + x2 <= 2.0"], # Optional. - outcome_constraints=["l2norm <= 1.25"], # Optional. - ) - scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="min") - tune.run( - easy_objective, - name="ax", - search_alg=algo, - scheduler=scheduler, - **tune_kwargs) + stop={"timesteps_total": 100}) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/bayesopt_example.py b/python/ray/tune/examples/bayesopt_example.py index 1d0e11212..97a9e7fa5 100644 --- a/python/ray/tune/examples/bayesopt_example.py +++ b/python/ray/tune/examples/bayesopt_example.py @@ -1,10 +1,9 @@ -"""This test checks that BayesOpt is functional. +"""This example demonstrates the usage of BayesOpt with Ray Tune. It also checks that it is usable with a separate scheduler. """ import time -import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest import ConcurrencyLimiter @@ -34,16 +33,7 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 1000, - "config": { - "steps": 100, - "width": tune.uniform(0, 20), - "height": tune.uniform(-100, 100) - } - } algo = BayesOptSearch(utility_kwargs={ "kind": "ucb", "kappa": 2.5, @@ -51,11 +41,18 @@ if __name__ == "__main__": }) algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, name="my_exp", metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, - **tune_kwargs) + num_samples=10 if args.smoke_test else 1000, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100) + }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/bohb_example.py b/python/ray/tune/examples/bohb_example.py index 2c01b64f4..dfe4debca 100644 --- a/python/ray/tune/examples/bohb_example.py +++ b/python/ray/tune/examples/bohb_example.py @@ -63,24 +63,22 @@ if __name__ == "__main__": # CS.CategoricalHyperparameter( # "activation", choices=["relu", "tanh"])) - experiment_metrics = dict(metric="episode_reward_mean", mode="max") - bohb_hyperband = HyperBandForBOHB( - time_attr="training_iteration", - max_t=100, - reduction_factor=4, - **experiment_metrics) + time_attr="training_iteration", max_t=100, reduction_factor=4) bohb_search = TuneBOHB( # space=config_space, # If you want to set the space manually - max_concurrent=4, - **experiment_metrics) + max_concurrent=4) - tune.run( + analysis = tune.run( MyTrainableClass, name="bohb_test", config=config, scheduler=bohb_hyperband, search_alg=bohb_search, num_samples=10, - stop={"training_iteration": 100}) + stop={"training_iteration": 100}, + metric="episode_reward_mean", + mode="max") + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/cifar10_pytorch.py b/python/ray/tune/examples/cifar10_pytorch.py index 4af8c4937..7cc59029d 100644 --- a/python/ray/tune/examples/cifar10_pytorch.py +++ b/python/ray/tune/examples/cifar10_pytorch.py @@ -14,7 +14,6 @@ import torchvision import torchvision.transforms as transforms import ray from ray import tune -from ray.tune import CLIReporter from ray.tune.schedulers import ASHAScheduler # __import_end__ @@ -187,21 +186,18 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): "batch_size": tune.choice([2, 4, 8, 16]) } scheduler = ASHAScheduler( - metric="loss", - mode="min", max_t=max_num_epochs, grace_period=1, reduction_factor=2) - reporter = CLIReporter( - # parameter_columns=["l1", "l2", "lr", "batch_size"], - metric_columns=["loss", "accuracy", "training_iteration"]) result = tune.run( - partial(train_cifar, data_dir=data_dir), + tune.with_parameters(train_cifar, data_dir=data_dir), resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, config=config, + metric="loss", + mode="min", num_samples=num_samples, - scheduler=scheduler, - progress_reporter=reporter) + scheduler=scheduler + ) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) diff --git a/python/ray/tune/examples/ddp_mnist_torch.py b/python/ray/tune/examples/ddp_mnist_torch.py index edb0694f6..889e1299f 100644 --- a/python/ray/tune/examples/ddp_mnist_torch.py +++ b/python/ray/tune/examples/ddp_mnist_torch.py @@ -72,4 +72,11 @@ if __name__ == "__main__": ray.init(**options) trainable_cls = DistributedTrainableCreator( train_mnist, num_workers=args.num_workers, use_gpu=args.use_gpu) - tune.run(trainable_cls, num_samples=4, stop={"training_iteration": 10}) + analysis = tune.run( + trainable_cls, + num_samples=4, + stop={"training_iteration": 10}, + metric="mean_accuracy", + mode="max") + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/dragonfly_example.py b/python/ray/tune/examples/dragonfly_example.py index 38c83083e..70f9f59e0 100644 --- a/python/ray/tune/examples/dragonfly_example.py +++ b/python/ray/tune/examples/dragonfly_example.py @@ -1,4 +1,4 @@ -"""This test checks that Dragonfly is functional. +"""This example demonstrates the usage of Dragonfly with Ray Tune. It also checks that it is usable with a separate scheduler. """ @@ -9,7 +9,6 @@ from __future__ import print_function import numpy as np import time -import ray from ray import tune from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler @@ -37,17 +36,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 50, - "config": { - "iterations": 100, - "LiNO3_vol": tune.uniform(0, 7), - "Li2SO4_vol": tune.uniform(0, 7), - "NaClO4_vol": tune.uniform(0, 7) - }, - } # Optional: Pass the parameter space yourself # space = [{ @@ -75,11 +63,20 @@ if __name__ == "__main__": df_search = ConcurrencyLimiter(df_search, max_concurrent=4) scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( objective, metric="objective", mode="max", name="dragonfly_search", search_alg=df_search, scheduler=scheduler, - **tune_kwargs) + num_samples=10 if args.smoke_test else 50, + config={ + "iterations": 100, + "LiNO3_vol": tune.uniform(0, 7), + "Li2SO4_vol": tune.uniform(0, 7), + "NaClO4_vol": tune.uniform(0, 7) + }, + ) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/genetic_example.py b/python/ray/tune/examples/genetic_example.py index cc566eca9..222539328 100644 --- a/python/ray/tune/examples/genetic_example.py +++ b/python/ray/tune/examples/genetic_example.py @@ -1,8 +1,7 @@ -"""This test checks that GeneticSearch is functional. +"""This example demonstrates the usage of GeneticSearch with Ray Tune. It also checks that it is usable with a separate scheduler. """ -import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.automl import GeneticSearch @@ -30,7 +29,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() space = SearchSpace({ ContinuousSpace("x1", 0, 4, 100), @@ -40,16 +38,19 @@ if __name__ == "__main__": DiscreteSpace("x5", [-1, 0, 1, 2, 3]), }) - config = {"stop": {"training_iteration": 100}} algo = GeneticSearch( space, reward_attr="neg_mean_loss", max_generation=2 if args.smoke_test else 10, population_size=10 if args.smoke_test else 50) - scheduler = AsyncHyperBandScheduler(metric="neg_mean_loss", mode="max") - tune.run( + scheduler = AsyncHyperBandScheduler() + analysis = tune.run( michalewicz_function, + metric="neg_mean_loss", + mode="max", name="my_exp", search_alg=algo, scheduler=scheduler, - **config) + stop={"training_iteration": 100}) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/horovod_simple.py b/python/ray/tune/examples/horovod_simple.py index f2a141529..ef956fca0 100644 --- a/python/ray/tune/examples/horovod_simple.py +++ b/python/ray/tune/examples/horovod_simple.py @@ -112,7 +112,9 @@ if __name__ == "__main__": replicate_pem=False) analysis = tune.run( horovod_trainable, + metric="loss", + mode="min", config={"lr": tune.uniform(0.1, 1)}, num_samples=2 if args.smoke_test else 10, fail_fast=True) - config = analysis.get_best_config(metric="loss", mode="min") + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index c2aff71ae..ded30970d 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -51,16 +51,14 @@ if __name__ == "__main__": # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. - hyperband = HyperBandScheduler( - time_attr="training_iteration", - metric="episode_reward_mean", - mode="max", - max_t=200) + hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200) - tune.run( + analysis = tune.run( MyTrainableClass, name="hyperband_test", num_samples=20, + metric="episode_reward_mean", + mode="max", stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ "width": tune.randint(10, 90), @@ -68,3 +66,5 @@ if __name__ == "__main__": }, scheduler=hyperband, fail_fast=True) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/hyperband_function_example.py b/python/ray/tune/examples/hyperband_function_example.py index 2492229fd..5ca7cabf6 100644 --- a/python/ray/tune/examples/hyperband_function_example.py +++ b/python/ray/tune/examples/hyperband_function_example.py @@ -21,6 +21,8 @@ def train(config, checkpoint_dir=None): v = np.tanh(float(timestep) / config.get("width", 1)) v *= config.get("height", 1) + # Checkpoint the state of the training every 3 steps + # Note that this is only required for certain schedulers if timestep % 3 == 0: with tune.checkpoint_dir(step=timestep) as checkpoint_dir: path = os.path.join(checkpoint_dir, "checkpoint") @@ -42,17 +44,16 @@ if __name__ == "__main__": # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. - hyperband = HyperBandScheduler( - time_attr="training_iteration", - metric="episode_reward_mean", - mode="max", - max_t=200) + hyperband = HyperBandScheduler(max_t=200) - tune.run( + analysis = tune.run( train, name="hyperband_test", num_samples=20, + metric="episode_reward_mean", + mode="max", stop={"training_iteration": 10 if args.smoke_test else 99999}, config={"height": tune.uniform(0, 100)}, scheduler=hyperband, fail_fast=True) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py index d28f059ec..00afe29db 100644 --- a/python/ray/tune/examples/hyperopt_example.py +++ b/python/ray/tune/examples/hyperopt_example.py @@ -1,4 +1,4 @@ -"""This test checks that HyperOpt is functional. +"""This example demonstrates the usage of HyperOpt with Ray Tune. It also checks that it is usable with a separate scheduler. """ @@ -12,6 +12,7 @@ from ray.tune.suggest.hyperopt import HyperOptSearch def evaluation_fn(step, width, height): + time.sleep(0.1) return (0.1 + width * step / 100)**(-1) + height * 0.1 @@ -24,7 +25,6 @@ def easy_objective(config): intermediate_score = evaluation_fn(step, width, height) # Feed the score back back to Tune. tune.report(iterations=step, mean_loss=intermediate_score) - time.sleep(0.1) if __name__ == "__main__": @@ -49,24 +49,23 @@ if __name__ == "__main__": } ] - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 1000, - "config": { - "steps": 100, - "width": tune.uniform(0, 20), - "height": tune.uniform(-100, 100), - # This is an ignored parameter. - "activation": tune.choice(["relu", "tanh"]) - } - } algo = HyperOptSearch(points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, search_alg=algo, scheduler=scheduler, metric="mean_loss", mode="min", - **tune_kwargs) + num_samples=10 if args.smoke_test else 1000, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]) + }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/lightgbm_example.py b/python/ray/tune/examples/lightgbm_example.py index 9ca41fa8e..16e62e090 100644 --- a/python/ray/tune/examples/lightgbm_example.py +++ b/python/ray/tune/examples/lightgbm_example.py @@ -5,6 +5,7 @@ import sklearn.metrics from sklearn.model_selection import train_test_split from ray import tune +from ray.tune.schedulers import ASHAScheduler def LightGBMCallback(env): @@ -41,11 +42,13 @@ if __name__ == "__main__": "num_leaves": tune.randint(10, 1000), "learning_rate": tune.loguniform(1e-8, 1e-1) } - from ray.tune.schedulers import ASHAScheduler - tune.run( + + analysis = tune.run( train_breast_cancer, metric="binary_error", mode="min", config=config, num_samples=2, scheduler=ASHAScheduler()) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/logging_example.py b/python/ray/tune/examples/logging_example.py index c0bf7eda4..53fede738 100755 --- a/python/ray/tune/examples/logging_example.py +++ b/python/ray/tune/examples/logging_example.py @@ -1,12 +1,9 @@ #!/usr/bin/env python import argparse -import json -import os -import numpy as np +import time from ray import tune -from ray.tune import Trainable, run class TestLogger(tune.logger.Logger): @@ -18,34 +15,20 @@ def trial_str_creator(trial): return "{}_{}_123".format(trial.trainable_name, trial.trial_id) -class MyTrainableClass(Trainable): - """Example agent whose learning curve is a random sigmoid. +def evaluation_fn(step, width, height): + time.sleep(0.1) + return (0.1 + width * step / 100)**(-1) + height * 0.1 - The dummy hyperparameters "width" and "height" determine the slope and - maximum reward value reached. - """ - def setup(self, config): - self.timestep = 0 +def easy_objective(config): + # Hyperparameters + width, height = config["width"], config["height"] - def step(self): - self.timestep += 1 - v = np.tanh(float(self.timestep) / self.config.get("width", 1)) - v *= self.config.get("height", 1) - - # Here we use `episode_reward_mean`, but you can also report other - # objectives such as loss or accuracy. - return {"episode_reward_mean": v} - - def save_checkpoint(self, checkpoint_dir): - path = os.path.join(checkpoint_dir, "checkpoint") - with open(path, "w") as f: - f.write(json.dumps({"timestep": self.timestep})) - return path - - def load_checkpoint(self, checkpoint_path): - with open(checkpoint_path) as f: - self.timestep = json.loads(f.read())["timestep"] + for step in range(config["steps"]): + # Iterative training function - can be any arbitrary training procedure + intermediate_score = evaluation_fn(step, width, height) + # Feed the score back back to Tune. + tune.report(iterations=step, mean_loss=intermediate_score) if __name__ == "__main__": @@ -54,14 +37,18 @@ if __name__ == "__main__": "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - trials = run( - MyTrainableClass, + analysis = tune.run( + easy_objective, name="hyperband_test", + metric="mean_loss", + mode="min", num_samples=5, trial_name_creator=trial_str_creator, loggers=[TestLogger], stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ + "steps": 100, "width": tune.randint(10, 100), "height": tune.loguniform(10, 100) }) + print("Best hyperparameters: ", analysis.best_config) diff --git a/python/ray/tune/examples/mnist_ptl_mini.py b/python/ray/tune/examples/mnist_ptl_mini.py index 8eb22b3fd..b1c2e2aa9 100644 --- a/python/ray/tune/examples/mnist_ptl_mini.py +++ b/python/ray/tune/examples/mnist_ptl_mini.py @@ -90,7 +90,7 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0): data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial) - tune.run( + analysis = tune.run( trainable, resources_per_trial={ "cpu": 1, @@ -102,6 +102,8 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0): num_samples=num_samples, name="tune_mnist") + print("Best hyperparameters found were: ", analysis.best_config) + if __name__ == "__main__": import argparse diff --git a/python/ray/tune/examples/mnist_pytorch_lightning.py b/python/ray/tune/examples/mnist_pytorch_lightning.py index c155aee42..0811200b9 100644 --- a/python/ray/tune/examples/mnist_pytorch_lightning.py +++ b/python/ray/tune/examples/mnist_pytorch_lightning.py @@ -93,7 +93,6 @@ class LightningMNISTClassifier(pl.LightningModule): self.log("ptl/val_loss", avg_loss) self.log("ptl/val_accuracy", avg_acc) - @staticmethod def download_data(data_dir): transform = transforms.Compose([ @@ -177,7 +176,8 @@ def train_mnist_tune_checkpoint(config, ckpt = pl_load( os.path.join(checkpoint_dir, "checkpoint"), map_location=lambda storage, loc: storage) - model = LightningMNISTClassifier._load_model_state(ckpt, config=config, data_dir=data_dir) + model = LightningMNISTClassifier._load_model_state( + ckpt, config=config, data_dir=data_dir) trainer.current_epoch = ckpt["epoch"] else: model = LightningMNISTClassifier(config=config, data_dir=data_dir) @@ -199,8 +199,6 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0): } scheduler = ASHAScheduler( - metric="loss", - mode="min", max_t=num_epochs, grace_period=1, reduction_factor=2) @@ -209,7 +207,7 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0): parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) - tune.run( + analysis = tune.run( tune.with_parameters( train_mnist_tune, data_dir=data_dir, @@ -219,12 +217,16 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0): "cpu": 1, "gpu": gpus_per_trial }, + metric="loss", + mode="min", config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_asha") + print("Best hyperparameters found were: ", analysis.best_config) + shutil.rmtree(data_dir) # __tune_asha_end__ @@ -242,9 +244,6 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): } scheduler = PopulationBasedTraining( - time_attr="training_iteration", - metric="loss", - mode="min", perturbation_interval=4, hyperparam_mutations={ "lr": tune.loguniform(1e-4, 1e-1), @@ -255,7 +254,7 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) - tune.run( + analysis = tune.run( tune.with_parameters( train_mnist_tune_checkpoint, data_dir=data_dir, @@ -265,12 +264,16 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): "cpu": 1, "gpu": gpus_per_trial }, + metric="loss", + mode="min", config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_pbt") + print("Best hyperparameters found were: ", analysis.best_config) + shutil.rmtree(data_dir) # __tune_pbt_end__ diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py index c31b81968..345a69877 100644 --- a/python/ray/tune/examples/mnist_pytorch_trainable.py +++ b/python/ray/tune/examples/mnist_pytorch_trainable.py @@ -88,5 +88,4 @@ if __name__ == "__main__": "momentum": tune.uniform(0.1, 0.9), }) - print("Best config is:", - analysis.get_best_config(metric="mean_accuracy", mode="max")) + print("Best config is:", analysis.best_config) diff --git a/python/ray/tune/examples/mxnet_example.py b/python/ray/tune/examples/mxnet_example.py index dd959e481..a8639c618 100644 --- a/python/ray/tune/examples/mxnet_example.py +++ b/python/ray/tune/examples/mxnet_example.py @@ -1,8 +1,6 @@ -from functools import partial - import mxnet as mx + from ray import tune, logger -from ray.tune import CLIReporter from ray.tune.integration.mxnet import TuneCheckpointCallback, \ TuneReportCallback from ray.tune.schedulers import ASHAScheduler @@ -59,25 +57,21 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10): } scheduler = ASHAScheduler( - metric="mean_accuracy", - mode="max", - max_t=num_epochs, - grace_period=1, - reduction_factor=2) + max_t=num_epochs, grace_period=1, reduction_factor=2) - reporter = CLIReporter( - parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"]) - - tune.run( - partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs), + analysis = tune.run( + tune.with_parameters( + train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs), resources_per_trial={ "cpu": 1, }, + metric="mean_accuracy", + mode="max", config=config, num_samples=num_samples, scheduler=scheduler, - progress_reporter=reporter, name="tune_mnist_mxnet") + return analysis if __name__ == "__main__": @@ -89,6 +83,8 @@ if __name__ == "__main__": args, _ = parser.parse_known_args() if args.smoke_test: - tune_mnist_mxnet(num_samples=1, num_epochs=1) + analysis = tune_mnist_mxnet(num_samples=1, num_epochs=1) else: - tune_mnist_mxnet(num_samples=10, num_epochs=10) + analysis = tune_mnist_mxnet(num_samples=10, num_epochs=10) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/nevergrad_example.py b/python/ray/tune/examples/nevergrad_example.py index 7eae59bc1..f16351faa 100644 --- a/python/ray/tune/examples/nevergrad_example.py +++ b/python/ray/tune/examples/nevergrad_example.py @@ -1,10 +1,9 @@ -"""This test checks that Nevergrad is functional. +"""This example demonstrates the usage of Nevergrad with Ray Tune. It also checks that it is usable with a separate scheduler. """ import time -import ray from ray import tune from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler @@ -35,18 +34,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - - # The config will be automatically converted to Nevergrad's search space - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 50, - "config": { - "steps": 100, - "width": tune.uniform(0, 20), - "height": tune.uniform(-100, 100), - "activation": tune.choice(["relu", "tanh"]) - } - } # Optional: Pass the parameter space yourself # space = ng.p.Dict( @@ -63,11 +50,19 @@ if __name__ == "__main__": scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, metric="mean_loss", mode="min", name="nevergrad", search_alg=algo, scheduler=scheduler, - **tune_kwargs) + num_samples=10 if args.smoke_test else 50, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + "activation": tune.choice(["relu", "tanh"]) + }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/optuna_example.py b/python/ray/tune/examples/optuna_example.py index ab7e68d38..f7a9dd88e 100644 --- a/python/ray/tune/examples/optuna_example.py +++ b/python/ray/tune/examples/optuna_example.py @@ -1,4 +1,4 @@ -"""This test checks that Optuna is functional. +"""This example demonstrates the usage of Optuna with Ray Tune. It also checks that it is usable with a separate scheduler. """ @@ -36,23 +36,22 @@ if __name__ == "__main__": args, _ = parser.parse_known_args() ray.init(configure_logging=False) - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 100, - "config": { - "steps": 100, - "width": tune.uniform(0, 20), - "height": tune.uniform(-100, 100), - # This is an ignored parameter. - "activation": tune.choice(["relu", "tanh"]) - } - } algo = OptunaSearch() algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, - **tune_kwargs) + num_samples=10 if args.smoke_test else 100, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + # This is an ignored parameter. + "activation": tune.choice(["relu", "tanh"]) + }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/pb2_example.py b/python/ray/tune/examples/pb2_example.py index 1d6ae7d77..29e08d50b 100644 --- a/python/ray/tune/examples/pb2_example.py +++ b/python/ray/tune/examples/pb2_example.py @@ -18,19 +18,18 @@ if __name__ == "__main__": ray.init() pbt = PB2( - time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=20, hyperparam_bounds={ # hyperparameter bounds. "lr": [0.0001, 0.02], }) - tune.run( + analysis = tune.run( pbt_function, name="pbt_test", scheduler=pbt, + metric="mean_accuracy", + mode="max", verbose=False, stop={ "training_iteration": 30, @@ -43,3 +42,5 @@ if __name__ == "__main__": # the model training in this example "some_other_factor": 1, }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/pb2_ppo_example.py b/python/ray/tune/examples/pb2_ppo_example.py index f93f8b645..063194c97 100644 --- a/python/ray/tune/examples/pb2_ppo_example.py +++ b/python/ray/tune/examples/pb2_ppo_example.py @@ -4,7 +4,6 @@ import argparse import pandas as pd from datetime import datetime -import ray from ray.tune import run, sample_from from ray.tune.schedulers import PopulationBasedTraining from ray.tune.schedulers.pb2 import PB2 @@ -46,7 +45,6 @@ if __name__ == "__main__": parser.add_argument("--save_csv", type=bool, default=False) args = parser.parse_args() - ray.init() # bipedalwalker needs 1600 if args.env_name in ["BipedalWalker-v2", "BipedalWalker-v3"]: diff --git a/python/ray/tune/examples/pbt_convnet_example.py b/python/ray/tune/examples/pbt_convnet_example.py index f54f036b9..ee62a044a 100644 --- a/python/ray/tune/examples/pbt_convnet_example.py +++ b/python/ray/tune/examples/pbt_convnet_example.py @@ -86,8 +86,6 @@ if __name__ == "__main__": # __pbt_begin__ scheduler = PopulationBasedTraining( time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=5, hyperparam_mutations={ # distribution for resampling @@ -118,6 +116,8 @@ if __name__ == "__main__": name="pbt_test", scheduler=scheduler, reuse_actors=True, + metric="mean_accuracy", + mode="max", verbose=1, stop=stopper, export_formats=[ExportFormat.MODEL], @@ -131,9 +131,8 @@ if __name__ == "__main__": }) # __tune_end__ - best_trial = analysis.get_best_trial("mean_accuracy", "max") - best_checkpoint = analysis.get_best_checkpoint( - best_trial, metric="mean_accuracy", mode="max") + best_trial = analysis.best_trial + best_checkpoint = analysis.best_checkpoint restored_trainable = PytorchTrainable() restored_trainable.restore(best_checkpoint) best_model = restored_trainable.model diff --git a/python/ray/tune/examples/pbt_convnet_function_example.py b/python/ray/tune/examples/pbt_convnet_function_example.py index a607bed12..dd4488d09 100644 --- a/python/ray/tune/examples/pbt_convnet_function_example.py +++ b/python/ray/tune/examples/pbt_convnet_function_example.py @@ -10,7 +10,6 @@ from torchvision import datasets from ray.tune.examples.mnist_pytorch import train, test, ConvNet,\ get_data_loaders -import ray from ray import tune from ray.tune.schedulers import PopulationBasedTraining from ray.tune.trial import ExportFormat @@ -66,14 +65,11 @@ if __name__ == "__main__": "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() datasets.MNIST("~/data", train=True, download=True) # __pbt_begin__ scheduler = PopulationBasedTraining( time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=5, hyperparam_mutations={ # distribution for resampling @@ -104,6 +100,8 @@ if __name__ == "__main__": train_convnet, name="pbt_test", scheduler=scheduler, + metric="mean_accuracy", + mode="max", verbose=1, stop=stopper, export_formats=[ExportFormat.MODEL], @@ -116,9 +114,8 @@ if __name__ == "__main__": }) # __tune_end__ - best_trial = analysis.get_best_trial("mean_accuracy", mode="max") - best_checkpoint_path = analysis.get_best_checkpoint( - best_trial, metric="mean_accuracy", mode="max") + best_trial = analysis.best_trial + best_checkpoint_path = analysis.best_checkpoint best_model = ConvNet() best_checkpoint = torch.load( os.path.join(best_checkpoint_path, "checkpoint")) diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_func.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_func.py index 77d8b376c..53a06d43c 100644 --- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_func.py +++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_func.py @@ -9,7 +9,6 @@ from ray.tune.schedulers import PopulationBasedTraining import argparse import os from filelock import FileLock -import random import torch import torch.nn as nn import torch.nn.parallel @@ -105,9 +104,6 @@ if __name__ == "__main__": mnist_model_ref = ray.put(mnist_cnn) scheduler = PopulationBasedTraining( - time_attr="training_iteration", - metric="is_score", - mode="max", perturbation_interval=5, hyperparam_mutations={ # distribution for resampling @@ -124,12 +120,12 @@ if __name__ == "__main__": stop={ "training_iteration": tune_iter, }, + metric="is_score", + mode="max", num_samples=8, config={ - "netG_lr": tune.sample_from( - lambda spec: random.choice([0.0001, 0.0002, 0.0005])), - "netD_lr": tune.sample_from( - lambda spec: random.choice([0.0001, 0.0002, 0.0005])), + "netG_lr": tune.choice([0.0001, 0.0002, 0.0005]), + "netD_lr": tune.choice([0.0001, 0.0002, 0.0005]), "mnist_model_ref": mnist_model_ref }) # __tune_end__ diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py index 8dea4fbcd..9841dd5e1 100644 --- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py +++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py @@ -127,8 +127,6 @@ if __name__ == "__main__": # __tune_begin__ scheduler = PopulationBasedTraining( time_attr="training_iteration", - metric="is_score", - mode="max", perturbation_interval=5, hyperparam_mutations={ # distribution for resampling @@ -143,6 +141,8 @@ if __name__ == "__main__": scheduler=scheduler, reuse_actors=True, verbose=1, + metric="is_score", + mode="max", checkpoint_at_end=True, stop={ "training_iteration": tune_iter, diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py index 90901e74a..768d05ec2 100755 --- a/python/ray/tune/examples/pbt_example.py +++ b/python/ray/tune/examples/pbt_example.py @@ -5,11 +5,11 @@ import argparse import random import ray -from ray.tune import Trainable, run +from ray import tune from ray.tune.schedulers import PopulationBasedTraining -class PBTBenchmarkExample(Trainable): +class PBTBenchmarkExample(tune.Trainable): """Toy PBT problem for benchmarking adaptive learning rate. The goal is to optimize this trainable's accuracy. The accuracy increases @@ -93,8 +93,6 @@ if __name__ == "__main__": pbt = PopulationBasedTraining( time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=20, hyperparam_mutations={ # distribution for resampling @@ -103,10 +101,12 @@ if __name__ == "__main__": "some_other_factor": [1, 2], }) - run( + analysis = tune.run( PBTBenchmarkExample, name="pbt_test", scheduler=pbt, + metric="mean_accuracy", + mode="max", reuse_actors=True, checkpoint_freq=20, verbose=False, @@ -120,3 +120,5 @@ if __name__ == "__main__": # the model training in this example "some_other_factor": 1, }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/pbt_function.py b/python/ray/tune/examples/pbt_function.py index 69c8d2570..59755be52 100644 --- a/python/ray/tune/examples/pbt_function.py +++ b/python/ray/tune/examples/pbt_function.py @@ -75,7 +75,8 @@ def pbt_function(config, checkpoint_dir=None): cur_lr=lr, optimal_lr=optimal_lr, # for debugging q_err=q_err, # for debugging - done=accuracy > midpoint * 2) + done=accuracy > midpoint * 2 # this stops the training process + ) if __name__ == "__main__": @@ -90,8 +91,6 @@ if __name__ == "__main__": pbt = PopulationBasedTraining( time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=4, hyperparam_mutations={ # distribution for resampling @@ -100,11 +99,13 @@ if __name__ == "__main__": "some_other_factor": [1, 2], }) - tune.run( + analysis = tune.run( pbt_function, name="pbt_test", scheduler=pbt, verbose=False, + metric="mean_accuracy", + mode="max", stop={ "training_iteration": 30, }, @@ -116,3 +117,5 @@ if __name__ == "__main__": # the model training in this example "some_other_factor": 1, }) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/pbt_memnn_example.py b/python/ray/tune/examples/pbt_memnn_example.py index 65ac2cd78..64be97380 100644 --- a/python/ray/tune/examples/pbt_memnn_example.py +++ b/python/ray/tune/examples/pbt_memnn_example.py @@ -268,9 +268,6 @@ if __name__ == "__main__": read_data() pbt = PopulationBasedTraining( - time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=2, hyperparam_mutations={ "dropout": lambda: np.random.uniform(0, 1), @@ -282,6 +279,8 @@ if __name__ == "__main__": MemNNModel, name="pbt_babi_memnn", scheduler=pbt, + metric="mean_accuracy", + mode="max", stop={"training_iteration": 4 if args.smoke_test else 100}, num_samples=2, config={ diff --git a/python/ray/tune/examples/pbt_ppo_example.py b/python/ray/tune/examples/pbt_ppo_example.py index d084f6214..5b665e6c4 100755 --- a/python/ray/tune/examples/pbt_ppo_example.py +++ b/python/ray/tune/examples/pbt_ppo_example.py @@ -11,8 +11,7 @@ computationally demanding example. import random -import ray -from ray.tune import run, sample_from +from ray import tune from ray.tune.schedulers import PopulationBasedTraining if __name__ == "__main__": @@ -29,8 +28,6 @@ if __name__ == "__main__": pbt = PopulationBasedTraining( time_attr="time_total_s", - metric="episode_reward_mean", - mode="max", perturbation_interval=120, resample_probability=0.25, # Specifies the mutations of these hyperparams @@ -44,12 +41,13 @@ if __name__ == "__main__": }, custom_explore_fn=explore) - ray.init() - run( + analysis = tune.run( "PPO", name="pbt_humanoid_test", scheduler=pbt, num_samples=8, + metric="episode_reward_mean", + mode="max", config={ "env": "Humanoid-v1", "kl_coeff": 1.0, @@ -63,10 +61,9 @@ if __name__ == "__main__": "clip_param": 0.2, "lr": 1e-4, # These params start off randomly drawn from a set. - "num_sgd_iter": sample_from( - lambda spec: random.choice([10, 20, 30])), - "sgd_minibatch_size": sample_from( - lambda spec: random.choice([128, 512, 2048])), - "train_batch_size": sample_from( - lambda spec: random.choice([10000, 20000, 40000])) + "num_sgd_iter": tune.choice([10, 20, 30]), + "sgd_minibatch_size": tune.choice([128, 512, 2048]), + "train_batch_size": tune.choice([10000, 20000, 40000]) }) + + print("best hyperparameters: ", analysis.best_config) diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py index e0403895d..c65f5de34 100755 --- a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py +++ b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py @@ -22,8 +22,7 @@ from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D from tensorflow.python.keras.models import Model, load_model from tensorflow.python.keras.preprocessing.image import ImageDataGenerator -import ray -from ray.tune import grid_search, run, sample_from +from ray import tune from ray.tune import Trainable from ray.tune.schedulers import PopulationBasedTraining @@ -184,38 +183,39 @@ if __name__ == "__main__": "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - train_spec = { - "resources_per_trial": { - "cpu": 1, - "gpu": 1 - }, - "stop": { - "mean_accuracy": 0.80, - "training_iteration": 30, - }, - "config": { - "epochs": 1, - "batch_size": 64, - "lr": grid_search([10**-4, 10**-5]), - "decay": sample_from(lambda spec: spec.config.lr / 100.0), - "dropout": grid_search([0.25, 0.5]), - }, - "num_samples": 4, + space = { + "epochs": 1, + "batch_size": 64, + "lr": tune.grid_search([10**-4, 10**-5]), + "decay": tune.sample_from(lambda spec: spec.config.lr / 100.0), + "dropout": tune.grid_search([0.25, 0.5]), } - if args.smoke_test: - train_spec["config"]["lr"] = 10**-4 - train_spec["config"]["dropout"] = 0.5 - - ray.init() + space["lr"] = 10**-4 + space["dropout"] = 0.5 pbt = PopulationBasedTraining( time_attr="training_iteration", - metric="mean_accuracy", - mode="max", perturbation_interval=10, hyperparam_mutations={ "dropout": lambda _: np.random.uniform(0, 1), }) - run(Cifar10Model, name="pbt_cifar10", scheduler=pbt, **train_spec) + analysis = tune.run( + Cifar10Model, + name="pbt_cifar10", + scheduler=pbt, + resources_per_trial={ + "cpu": 1, + "gpu": 1 + }, + stop={ + "mean_accuracy": 0.80, + "training_iteration": 30, + }, + config=space, + num_samples=4, + metric="mean_accuracy", + mode="max", + ) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/sigopt_example.py b/python/ray/tune/examples/sigopt_example.py index ab8ce8281..dabe38aa1 100644 --- a/python/ray/tune/examples/sigopt_example.py +++ b/python/ray/tune/examples/sigopt_example.py @@ -1,10 +1,9 @@ -"""This test checks that SigOpt is functional. +"""This example demonstrates the usage of SigOpt with Ray Tune. It also checks that it is usable with a separate scheduler. """ import time -import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.sigopt import SigOptSearch @@ -37,7 +36,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() space = [ { @@ -57,13 +55,6 @@ if __name__ == "__main__": }, }, ] - - config = { - "num_samples": 10 if args.smoke_test else 1000, - "config": { - "steps": 10 - } - } algo = SigOptSearch( space, name="SigOpt Example Experiment", @@ -71,9 +62,12 @@ if __name__ == "__main__": metric="mean_loss", mode="min") scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") - tune.run( + analysis = tune.run( easy_objective, name="my_exp", search_alg=algo, scheduler=scheduler, - **config) + num_samples=10 if args.smoke_test else 1000, + config={"steps": 10}) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/sigopt_multi_objective_example.py b/python/ray/tune/examples/sigopt_multi_objective_example.py index 4fd495c85..1d34da8cc 100644 --- a/python/ray/tune/examples/sigopt_multi_objective_example.py +++ b/python/ray/tune/examples/sigopt_multi_objective_example.py @@ -1,13 +1,9 @@ -"""This test checks that SigOpt is functional. +"""Example using Sigopt's multi-objective functionality.""" -It also checks that it is usable with a separate scheduler. -""" import time -import ray import numpy as np from ray import tune -from ray.tune.schedulers import FIFOScheduler from ray.tune.suggest.sigopt import SigOptSearch np.random.seed(0) @@ -41,7 +37,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() space = [ { @@ -54,13 +49,6 @@ if __name__ == "__main__": }, ] - config = { - "num_samples": 10 if args.smoke_test else 1000, - "config": { - "total_weight": 1 - } - } - algo = SigOptSearch( space, name="SigOpt Example Multi Objective Experiment", @@ -69,11 +57,10 @@ if __name__ == "__main__": metric=["average", "std", "sharpe"], mode=["max", "min", "obs"]) - scheduler = FIFOScheduler() - - tune.run( + analysis = tune.run( easy_objective, name="my_exp", search_alg=algo, - scheduler=scheduler, - **config) + num_samples=10 if args.smoke_test else 1000, + config={"total_weight": 1}) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/sigopt_prior_beliefs_example.py b/python/ray/tune/examples/sigopt_prior_beliefs_example.py index 870efb31f..420a342a8 100644 --- a/python/ray/tune/examples/sigopt_prior_beliefs_example.py +++ b/python/ray/tune/examples/sigopt_prior_beliefs_example.py @@ -1,14 +1,8 @@ -"""This test checks that SigOpt is functional. +""""Example using Sigopt's support for prior beliefs.""" -It also checks that it is usable with a separate scheduler. -""" -import time - -import ray import numpy as np from ray import tune -from ray.tune.schedulers import FIFOScheduler from ray.tune.suggest.sigopt import SigOptSearch np.random.seed(0) @@ -36,7 +30,6 @@ def easy_objective(config): average, std = evaluate(w1, w2, w3) tune.report(average=average, std=std) - time.sleep(0.1) if __name__ == "__main__": @@ -51,8 +44,6 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - samples = 10 if args.smoke_test else 1000 conn = Connection(client_token=os.environ["SIGOPT_KEY"]) @@ -90,8 +81,6 @@ if __name__ == "__main__": observation_budget=samples, parallel_bandwidth=1) - config = {"num_samples": samples, "config": {}} - algo = SigOptSearch( connection=conn, experiment_id=experiment.id, @@ -100,11 +89,10 @@ if __name__ == "__main__": metric=["average", "std"], mode=["obs", "min"]) - scheduler = FIFOScheduler() - - tune.run( + analysis = tune.run( easy_objective, name="my_exp", search_alg=algo, - scheduler=scheduler, - **config) + num_samples=samples, + config={}) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py index ec0d891a2..05274c535 100644 --- a/python/ray/tune/examples/skopt_example.py +++ b/python/ray/tune/examples/skopt_example.py @@ -1,10 +1,9 @@ -"""This test checks that Skopt is functional. +"""This example demonstrates the usage of Skopt with Ray Tune. It also checks that it is usable with a separate scheduler. """ import time -import ray from ray import tune from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler @@ -12,6 +11,7 @@ from ray.tune.suggest.skopt import SkOptSearch def evaluation_fn(step, width, height): + time.sleep(0.1) return (0.1 + width * step / 100)**(-1) + height * 0.1 @@ -24,7 +24,6 @@ def easy_objective(config): intermediate_score = evaluation_fn(step, width, height) # Feed the score back back to Tune. tune.report(iterations=step, mean_loss=intermediate_score) - time.sleep(0.1) if __name__ == "__main__": @@ -34,18 +33,8 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() # The config will be automatically converted to SkOpt's search space - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 50, - "config": { - "steps": 100, - "width": tune.uniform(0, 20), - "height": tune.uniform(-100, 100), - "activation": tune.choice(["relu", "tanh"]) - } - } # Optional: Pass the parameter space yourself # space = { @@ -66,11 +55,18 @@ if __name__ == "__main__": scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, metric="mean_loss", mode="min", name="skopt_exp_with_warmstart", search_alg=algo, scheduler=scheduler, - **tune_kwargs) + num_samples=10 if args.smoke_test else 50, + config={ + "steps": 100, + "width": tune.uniform(0, 20), + "height": tune.uniform(-100, 100), + "activation": tune.choice(["relu", "tanh"]) + }) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/tf_distributed_keras_example.py b/python/ray/tune/examples/tf_distributed_keras_example.py index ff8d8b8d4..643bb52ff 100644 --- a/python/ray/tune/examples/tf_distributed_keras_example.py +++ b/python/ray/tune/examples/tf_distributed_keras_example.py @@ -86,16 +86,15 @@ if __name__ == "__main__": use_gpu=args.use_gpu, num_workers=2, ) - sched = AsyncHyperBandScheduler( - time_attr="training_iteration", - metric="mean_accuracy", - mode="max", - max_t=400, - grace_period=20) - tune.run( + + sched = AsyncHyperBandScheduler(max_t=400, grace_period=20) + + analysis = tune.run( tf_trainable, name="exp", scheduler=sched, + metric="mean_accuracy", + mode="max", stop={ "mean_accuracy": 0.99, "training_iteration": 10 @@ -108,3 +107,4 @@ if __name__ == "__main__": "hidden": tune.sample_from( lambda spec: np.random.randint(32, 512)), }) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/tf_mnist_example.py b/python/ray/tune/examples/tf_mnist_example.py index 054df5ceb..95f18809e 100644 --- a/python/ray/tune/examples/tf_mnist_example.py +++ b/python/ray/tune/examples/tf_mnist_example.py @@ -116,8 +116,12 @@ class MNISTTrainable(tune.Trainable): if __name__ == "__main__": load_data() # we download data on the driver to avoid race conditions. - tune.run( + analysis = tune.run( MNISTTrainable, + metric="test_loss", + mode="min", stop={"training_iteration": 5 if args.smoke_test else 50}, verbose=1, config={"hiddens": tune.grid_search([32, 64, 128])}) + + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/tune_cifar10_gluon.py b/python/ray/tune/examples/tune_cifar10_gluon.py index ac0fa90d5..5ba8de5bb 100644 --- a/python/ray/tune/examples/tune_cifar10_gluon.py +++ b/python/ray/tune/examples/tune_cifar10_gluon.py @@ -13,6 +13,7 @@ from mxnet.gluon.data.vision import transforms from gluoncv.model_zoo import get_model from gluoncv.data import transforms as gcv_transforms +from ray.tune.schedulers import create_scheduler from ray import tune # Training settings @@ -185,19 +186,9 @@ def train_cifar10(config): if __name__ == "__main__": args = parser.parse_args() + sched = create_scheduler(args.scheduler) - import ray - from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler - - ray.init() - if args.scheduler == "fifo": - sched = FIFOScheduler() - elif args.scheduler == "asynchyperband": - sched = AsyncHyperBandScheduler( - metric="mean_loss", mode="min", max_t=400, grace_period=60) - else: - raise NotImplementedError - tune.run( + analysis = tune.run( train_cifar10, name=args.expname, verbose=2, @@ -213,8 +204,7 @@ if __name__ == "__main__": num_samples=1 if args.smoke_test else args.num_samples, config={ "args": args, - "lr": tune.sample_from( - lambda spec: np.power(10.0, np.random.uniform(-4, -1))), - "momentum": tune.sample_from( - lambda spec: np.random.uniform(0.85, 0.95)), + "lr": tune.loguniform(1e-4, 1e-1), + "momentum": tune.uniform(0.85, 0.95), }) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py index 3eac52b4a..3af1aa2dc 100644 --- a/python/ray/tune/examples/tune_mnist_keras.py +++ b/python/ray/tune/examples/tune_mnist_keras.py @@ -1,5 +1,4 @@ import argparse -import numpy as np from tensorflow.keras.datasets import mnist from ray.tune.integration.keras import TuneReportCallback @@ -52,16 +51,14 @@ if __name__ == "__main__": ray.init(num_cpus=4 if args.smoke_test else None) sched = AsyncHyperBandScheduler( - time_attr="training_iteration", - metric="mean_accuracy", - mode="max", - max_t=400, - grace_period=20) + time_attr="training_iteration", max_t=400, grace_period=20) - tune.run( + analysis = tune.run( train_mnist, name="exp", scheduler=sched, + metric="mean_accuracy", + mode="max", stop={ "mean_accuracy": 0.99, "training_iteration": 5 if args.smoke_test else 300 @@ -73,9 +70,8 @@ if __name__ == "__main__": }, config={ "threads": 2, - "lr": tune.sample_from(lambda spec: np.random.uniform(0.001, 0.1)), - "momentum": tune.sample_from( - lambda spec: np.random.uniform(0.1, 0.9)), - "hidden": tune.sample_from( - lambda spec: np.random.randint(32, 512)), + "lr": tune.uniform(0.001, 0.1), + "momentum": tune.uniform(0.1, 0.9), + "hidden": tune.randint(32, 512), }) + print("Best hyperparameters found were: ", analysis.best_config) diff --git a/python/ray/tune/examples/wandb_example.py b/python/ray/tune/examples/wandb_example.py index 526a7b1c5..acd89f9d8 100644 --- a/python/ray/tune/examples/wandb_example.py +++ b/python/ray/tune/examples/wandb_example.py @@ -20,8 +20,10 @@ def train_function(config, checkpoint_dir=None): def tune_function(api_key_file): """Example for using a WandbLogger with the function API""" - tune.run( + analysis = tune.run( train_function, + metric="loss", + mode="min", config={ "mean": tune.grid_search([1, 2, 3, 4, 5]), "sd": tune.uniform(0.2, 0.8), @@ -31,6 +33,7 @@ def tune_function(api_key_file): } }, loggers=DEFAULT_LOGGERS + (WandbLogger, )) + return analysis.best_config @wandb_mixin @@ -43,8 +46,10 @@ def decorated_train_function(config, checkpoint_dir=None): def tune_decorated(api_key_file): """Example for using the @wandb_mixin decorator with the function API""" - tune.run( + analysis = tune.run( decorated_train_function, + metric="loss", + mode="min", config={ "mean": tune.grid_search([1, 2, 3, 4, 5]), "sd": tune.uniform(0.2, 0.8), @@ -53,6 +58,7 @@ def tune_decorated(api_key_file): "project": "Wandb_example" } }) + return analysis.best_config class WandbTrainable(WandbTrainableMixin, Trainable): @@ -65,8 +71,10 @@ class WandbTrainable(WandbTrainableMixin, Trainable): def tune_trainable(api_key_file): """Example for using a WandTrainableMixin with the class API""" - tune.run( + analysis = tune.run( WandbTrainable, + metric="loss", + mode="min", config={ "mean": tune.grid_search([1, 2, 3, 4, 5]), "sd": tune.uniform(0.2, 0.8), @@ -75,6 +83,7 @@ def tune_trainable(api_key_file): "project": "Wandb_example" } }) + return analysis.best_config if __name__ == "__main__": diff --git a/python/ray/tune/examples/xgboost_example.py b/python/ray/tune/examples/xgboost_example.py index 285734722..dc612b823 100644 --- a/python/ray/tune/examples/xgboost_example.py +++ b/python/ray/tune/examples/xgboost_example.py @@ -1,5 +1,6 @@ import sklearn.datasets import sklearn.metrics +import os from ray.tune.schedulers import ASHAScheduler from sklearn.model_selection import train_test_split import xgboost as xgb @@ -8,7 +9,8 @@ from ray import tune from ray.tune.integration.xgboost import TuneReportCheckpointCallback -def train_breast_cancer(config): +def train_breast_cancer(config: dict): + # This is a simple training function to be passed into Tune # Load dataset data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True) # Split into train and test set @@ -17,7 +19,7 @@ def train_breast_cancer(config): # Build input matrices for XGBoost train_set = xgb.DMatrix(train_x, label=train_y) test_set = xgb.DMatrix(test_x, label=test_y) - # Train the classifier + # Train the classifier, using the Tune callback xgb.train( config, train_set, @@ -27,7 +29,8 @@ def train_breast_cancer(config): if __name__ == "__main__": - config = { + search_space = { + # You can mix constants with search space objects. "objective": "binary:logistic", "eval_metric": ["logloss", "error"], "max_depth": tune.randint(1, 9), @@ -35,6 +38,7 @@ if __name__ == "__main__": "subsample": tune.uniform(0.5, 1.0), "eta": tune.loguniform(1e-4, 1e-1) } + # This will enable aggressive early stopping of bad trials. scheduler = ASHAScheduler( max_t=10, # 10 training iterations grace_period=1, @@ -44,13 +48,13 @@ if __name__ == "__main__": train_breast_cancer, metric="eval-logloss", mode="min", - resources_per_trial={"cpu": 1}, # You can add "gpu": 0.1 here - config=config, + # You can add "gpu": 0.1 to allocate GPUs + resources_per_trial={"cpu": 1}, + config=search_space, num_samples=10, scheduler=scheduler) # Load the best model checkpoint - import os best_bst = xgb.Booster() best_bst.load_model(os.path.join(analysis.best_checkpoint, "model.xgb")) accuracy = 1. - analysis.best_result["eval-error"] diff --git a/python/ray/tune/examples/zoopt_example.py b/python/ray/tune/examples/zoopt_example.py index 53da3c5e3..f704d3fbb 100644 --- a/python/ray/tune/examples/zoopt_example.py +++ b/python/ray/tune/examples/zoopt_example.py @@ -1,10 +1,9 @@ -"""This test checks that ZOOptSearch is functional. +"""This example demonstrates the usage of ZOOptSearch. It also checks that it is usable with a separate scheduler. """ import time -import ray from ray import tune from ray.tune.suggest.zoopt import ZOOptSearch from ray.tune.schedulers import AsyncHyperBandScheduler @@ -12,6 +11,7 @@ from zoopt import ValueType # noqa: F401 def evaluation_fn(step, width, height): + time.sleep(0.1) return (0.1 + width * step / 100)**(-1) + height * 0.1 @@ -24,7 +24,6 @@ def easy_objective(config): intermediate_score = evaluation_fn(step, width, height) # Feed the score back back to Tune. tune.report(iterations=step, mean_loss=intermediate_score) - time.sleep(0.1) if __name__ == "__main__": @@ -34,16 +33,8 @@ if __name__ == "__main__": parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() - ray.init() - tune_kwargs = { - "num_samples": 10 if args.smoke_test else 1000, - "config": { - "steps": 10, - "height": tune.quniform(-10, 10, 1e-2), - "width": tune.randint(0, 10) - } - } + num_samples = 10 if args.smoke_test else 1000 # Optional: Pass the parameter space yourself # space = { @@ -61,17 +52,23 @@ if __name__ == "__main__": zoopt_search = ZOOptSearch( algo="Asracos", # only support ASRacos currently - budget=tune_kwargs["num_samples"], + budget=num_samples, # dim_dict=space, # If you want to set the space yourself - metric="mean_loss", - mode="min", **zoopt_search_config) - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + scheduler = AsyncHyperBandScheduler() - tune.run( + analysis = tune.run( easy_objective, + metric="mean_loss", + mode="min", search_alg=zoopt_search, name="zoopt_search", scheduler=scheduler, - **tune_kwargs) + num_samples=num_samples, + config={ + "steps": 10, + "height": tune.quniform(-10, 10, 1e-2), + "width": tune.randint(0, 10) + }) + print("Best config found: ", analysis.best_config) diff --git a/python/ray/tune/tests/_test_cluster_interrupt_searcher.py b/python/ray/tune/tests/_test_cluster_interrupt_searcher.py index 105cb1225..3e7e868b1 100644 --- a/python/ray/tune/tests/_test_cluster_interrupt_searcher.py +++ b/python/ray/tune/tests/_test_cluster_interrupt_searcher.py @@ -2,7 +2,7 @@ import os import argparse from ray.tune import run -from ray.tune.examples.async_hyperband_example import MyTrainableClass +from ray.tune.utils._mock_trainable import MyTrainableClass from ray.tune.suggest.hyperopt import HyperOptSearch from ray.tune.suggest.suggestion import ConcurrencyLimiter diff --git a/python/ray/tune/tests/test_cluster.py b/python/ray/tune/tests/test_cluster.py index 29408d2fe..d9eb7e60b 100644 --- a/python/ray/tune/tests/test_cluster.py +++ b/python/ray/tune/tests/test_cluster.py @@ -25,6 +25,7 @@ from ray.tune.syncer import CloudSyncer, SyncerCallback, get_node_syncer from ray.tune.utils.trainable import TrainableUtil from ray.tune.trial import Trial from ray.tune.trial_runner import TrialRunner +from ray.tune.utils._mock_trainable import MyTrainableClass from ray.tune.utils.mock import (MockDurableTrainer, MockRemoteTrainer, MockNodeSyncer, mock_storage_client, MOCK_REMOTE_DIR) @@ -746,7 +747,6 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir): cluster = start_connected_cluster dirpath = str(tmpdir) local_checkpoint_dir = os.path.join(dirpath, "experiment") - from ray.tune.examples.async_hyperband_example import MyTrainableClass from ray.tune import register_trainable register_trainable("trainable", MyTrainableClass) @@ -770,6 +770,8 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir): if trials and len(trials) >= 10: break time.sleep(.5) + else: + raise ValueError(f"Didn't generate enough trials: {len(trials)}") if not TrialRunner.checkpoint_exists(local_checkpoint_dir): raise RuntimeError( @@ -792,8 +794,10 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir): runner = TrialRunner( resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir) trials = runner.get_trials() + if len(trials) == 0: continue # nonblocking script hasn't resumed yet, wait + reached = True assert len(trials) >= 10 assert len(trials) <= 20 diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py index 195f68ba0..911c5c32a 100644 --- a/python/ray/tune/tests/test_experiment_analysis.py +++ b/python/ray/tune/tests/test_experiment_analysis.py @@ -8,7 +8,7 @@ from numpy import nan import ray from ray import tune -from ray.tune.examples.async_hyperband_example import MyTrainableClass +from ray.tune.utils.mock import MyTrainableClass class ExperimentAnalysisSuite(unittest.TestCase): diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index 779acf8ac..db95254fe 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -11,7 +11,7 @@ import numpy as np import ray from ray.tune import (run, Trainable, sample_from, Analysis, ExperimentAnalysis, grid_search) -from ray.tune.examples.async_hyperband_example import MyTrainableClass +from ray.tune.utils.mock import MyTrainableClass class ExperimentAnalysisInMemorySuite(unittest.TestCase): diff --git a/python/ray/tune/tests/test_tune_restore.py b/python/ray/tune/tests/test_tune_restore.py index 556c28e8f..507dca1d7 100644 --- a/python/ray/tune/tests/test_tune_restore.py +++ b/python/ray/tune/tests/test_tune_restore.py @@ -116,7 +116,7 @@ class TuneExampleTest(unittest.TestCase): validate_save_restore(MyTrainableClass, use_object_store=True) def testAsyncHyperbandExample(self): - from ray.tune.examples.async_hyperband_example import MyTrainableClass + from ray.tune.utils.mock import MyTrainableClass validate_save_restore(MyTrainableClass) validate_save_restore(MyTrainableClass, use_object_store=True) diff --git a/python/ray/tune/utils/_mock_trainable.py b/python/ray/tune/utils/_mock_trainable.py new file mode 100644 index 000000000..ea5ef00af --- /dev/null +++ b/python/ray/tune/utils/_mock_trainable.py @@ -0,0 +1,34 @@ +import os +import json +import numpy as np +from ray.tune import Trainable + + +class MyTrainableClass(Trainable): + """Example agent whose learning curve is a random sigmoid. + + The dummy hyperparameters "width" and "height" determine the slope and + maximum reward value reached. + """ + + def setup(self, config): + self.timestep = 0 + + def step(self): + self.timestep += 1 + v = np.tanh(float(self.timestep) / self.config.get("width", 1)) + v *= self.config.get("height", 1) + + # Here we use `episode_reward_mean`, but you can also report other + # objectives such as loss or accuracy. + return {"episode_reward_mean": v} + + def save_checkpoint(self, checkpoint_dir): + path = os.path.join(checkpoint_dir, "checkpoint") + with open(path, "w") as f: + f.write(json.dumps({"timestep": self.timestep})) + return path + + def load_checkpoint(self, checkpoint_path): + with open(checkpoint_path) as f: + self.timestep = json.loads(f.read())["timestep"] diff --git a/python/ray/tune/utils/mock.py b/python/ray/tune/utils/mock.py index 214b05726..cdc614b93 100644 --- a/python/ray/tune/utils/mock.py +++ b/python/ray/tune/utils/mock.py @@ -1,9 +1,11 @@ import os +import numpy as np +import json import ray.utils from ray.rllib.agents.mock import _MockTrainer -from ray.tune import DurableTrainable +from ray.tune import DurableTrainable, Trainable from ray.tune.sync_client import get_sync_client from ray.tune.syncer import NodeSyncer @@ -56,3 +58,33 @@ class MockDurableTrainer(DurableTrainable, _MockTrainer): def _create_storage_client(self): return mock_storage_client() + + +class MyTrainableClass(Trainable): + """Example agent whose learning curve is a random sigmoid. + + The dummy hyperparameters "width" and "height" determine the slope and + maximum reward value reached. + """ + + def setup(self, config): + self.timestep = 0 + + def step(self): + self.timestep += 1 + v = np.tanh(float(self.timestep) / self.config.get("width", 1)) + v *= self.config.get("height", 1) + + # Here we use `episode_reward_mean`, but you can also report other + # objectives such as loss or accuracy. + return {"episode_reward_mean": v} + + def save_checkpoint(self, checkpoint_dir): + path = os.path.join(checkpoint_dir, "checkpoint") + with open(path, "w") as f: + f.write(json.dumps({"timestep": self.timestep})) + return path + + def load_checkpoint(self, checkpoint_path): + with open(checkpoint_path) as f: + self.timestep = json.loads(f.read())["timestep"]