[tune] refactor and add examples (#11931)

This commit is contained in:
Richard Liaw
2020-11-14 20:43:28 -08:00
committed by GitHub
parent 5891759a3e
commit 8b3f79f307
57 changed files with 587 additions and 503 deletions
@@ -1,45 +1,27 @@
#!/usr/bin/env python
import argparse
import json
import os
import random
import numpy as np
import time
import ray
from ray.tune import Trainable, run, sample_from
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
class MyTrainableClass(Trainable):
"""Example agent whose learning curve is a random sigmoid.
def evaluation_fn(step, width, height):
time.sleep(0.1)
return (0.1 + width * step / 100)**(-1) + height * 0.1
The dummy hyperparameters "width" and "height" determine the slope and
maximum reward value reached.
"""
def setup(self, config):
self.timestep = 0
def easy_objective(config):
# Hyperparameters
width, height = config["width"], config["height"]
def step(self):
self.timestep += 1
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
v *= self.config.get("height", 1)
# Here we use `episode_reward_mean`, but you can also report other
# objectives such as loss or accuracy.
return {"episode_reward_mean": v}
def save_checkpoint(self, checkpoint_dir):
path = os.path.join(checkpoint_dir, "checkpoint")
with open(path, "w") as f:
f.write(json.dumps({"timestep": self.timestep}))
return path
def load_checkpoint(self, checkpoint_path):
with open(checkpoint_path) as f:
self.timestep = json.loads(f.read())["timestep"]
for step in range(config["steps"]):
# Iterative training function - can be an arbitrary training procedure
intermediate_score = evaluation_fn(step, width, height)
# Feed the score back back to Tune.
tune.report(iterations=step, mean_loss=intermediate_score)
if __name__ == "__main__":
@@ -48,31 +30,33 @@ if __name__ == "__main__":
"--smoke-test", action="store_true", help="Finish quickly for testing")
parser.add_argument(
"--ray-address",
help="Address of Ray cluster for seamless distributed execution.")
help="Address of Ray cluster for seamless distributed execution.",
required=False)
args, _ = parser.parse_known_args()
ray.init(address=args.ray_address)
# asynchronous hyperband early stopping, configured with
# `episode_reward_mean` as the
# objective and `training_iteration` as the time unit,
# which is automatically filled by Tune.
ahb = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="episode_reward_mean",
mode="max",
grace_period=5,
max_t=100)
# AsyncHyperBand enables aggressive early stopping of bad trials.
scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100)
run(MyTrainableClass,
# 'training_iteration' is incremented every time `trainable.step` is called
stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999}
analysis = tune.run(
easy_objective,
name="asynchyperband_test",
scheduler=ahb,
stop={"training_iteration": 1 if args.smoke_test else 99999},
metric="mean_loss",
mode="min",
scheduler=scheduler,
stop=stopping_criteria,
num_samples=20,
verbose=1,
resources_per_trial={
"cpu": 1,
"gpu": 0
},
config={
"width": sample_from(lambda spec: 10 + int(90 * random.random())),
"height": sample_from(lambda spec: int(100 * random.random())),
config={ # Hyperparameter space
"steps": 100,
"width": tune.uniform(10, 100),
"height": tune.uniform(0, 100),
})
print("Best hyperparameters found were: ", analysis.best_config)
+19 -25
View File
@@ -1,11 +1,10 @@
"""This test checks that AxSearch is functional.
"""This example demonstrates the usage of AxSearch with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import numpy as np
import time
import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.ax import AxSearch
@@ -52,11 +51,21 @@ if __name__ == "__main__":
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 50,
"config": {
algo = AxSearch(
max_concurrent=4,
parameter_constraints=["x1 + x2 <= 2.0"], # Optional.
outcome_constraints=["l2norm <= 1.25"], # Optional.
)
scheduler = AsyncHyperBandScheduler()
analysis = tune.run(
easy_objective,
name="ax",
metric="hartmann6", # provided in the 'easy_objective' function
mode="min",
search_alg=algo,
scheduler=scheduler,
num_samples=10 if args.smoke_test else 50,
config={
"iterations": 100,
"x1": tune.uniform(0.0, 1.0),
"x2": tune.uniform(0.0, 1.0),
@@ -65,21 +74,6 @@ if __name__ == "__main__":
"x5": tune.uniform(0.0, 1.0),
"x6": tune.uniform(0.0, 1.0),
},
"stop": {
"timesteps_total": 100
}
}
algo = AxSearch(
max_concurrent=4,
metric="hartmann6",
mode="min",
parameter_constraints=["x1 + x2 <= 2.0"], # Optional.
outcome_constraints=["l2norm <= 1.25"], # Optional.
)
scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="min")
tune.run(
easy_objective,
name="ax",
search_alg=algo,
scheduler=scheduler,
**tune_kwargs)
stop={"timesteps_total": 100})
print("Best hyperparameters found were: ", analysis.best_config)
+10 -13
View File
@@ -1,10 +1,9 @@
"""This test checks that BayesOpt is functional.
"""This example demonstrates the usage of BayesOpt with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest import ConcurrencyLimiter
@@ -34,16 +33,7 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 1000,
"config": {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100)
}
}
algo = BayesOptSearch(utility_kwargs={
"kind": "ucb",
"kappa": 2.5,
@@ -51,11 +41,18 @@ if __name__ == "__main__":
})
algo = ConcurrencyLimiter(algo, max_concurrent=4)
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
name="my_exp",
metric="mean_loss",
mode="min",
search_alg=algo,
scheduler=scheduler,
**tune_kwargs)
num_samples=10 if args.smoke_test else 1000,
config={
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100)
})
print("Best hyperparameters found were: ", analysis.best_config)
+8 -10
View File
@@ -63,24 +63,22 @@ if __name__ == "__main__":
# CS.CategoricalHyperparameter(
# "activation", choices=["relu", "tanh"]))
experiment_metrics = dict(metric="episode_reward_mean", mode="max")
bohb_hyperband = HyperBandForBOHB(
time_attr="training_iteration",
max_t=100,
reduction_factor=4,
**experiment_metrics)
time_attr="training_iteration", max_t=100, reduction_factor=4)
bohb_search = TuneBOHB(
# space=config_space, # If you want to set the space manually
max_concurrent=4,
**experiment_metrics)
max_concurrent=4)
tune.run(
analysis = tune.run(
MyTrainableClass,
name="bohb_test",
config=config,
scheduler=bohb_hyperband,
search_alg=bohb_search,
num_samples=10,
stop={"training_iteration": 100})
stop={"training_iteration": 100},
metric="episode_reward_mean",
mode="max")
print("Best hyperparameters found were: ", analysis.best_config)
+5 -9
View File
@@ -14,7 +14,6 @@ import torchvision
import torchvision.transforms as transforms
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
# __import_end__
@@ -187,21 +186,18 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
"batch_size": tune.choice([2, 4, 8, 16])
}
scheduler = ASHAScheduler(
metric="loss",
mode="min",
max_t=max_num_epochs,
grace_period=1,
reduction_factor=2)
reporter = CLIReporter(
# parameter_columns=["l1", "l2", "lr", "batch_size"],
metric_columns=["loss", "accuracy", "training_iteration"])
result = tune.run(
partial(train_cifar, data_dir=data_dir),
tune.with_parameters(train_cifar, data_dir=data_dir),
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
config=config,
metric="loss",
mode="min",
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter)
scheduler=scheduler
)
best_trial = result.get_best_trial("loss", "min", "last")
print("Best trial config: {}".format(best_trial.config))
+8 -1
View File
@@ -72,4 +72,11 @@ if __name__ == "__main__":
ray.init(**options)
trainable_cls = DistributedTrainableCreator(
train_mnist, num_workers=args.num_workers, use_gpu=args.use_gpu)
tune.run(trainable_cls, num_samples=4, stop={"training_iteration": 10})
analysis = tune.run(
trainable_cls,
num_samples=4,
stop={"training_iteration": 10},
metric="mean_accuracy",
mode="max")
print("Best hyperparameters found were: ", analysis.best_config)
+12 -15
View File
@@ -1,4 +1,4 @@
"""This test checks that Dragonfly is functional.
"""This example demonstrates the usage of Dragonfly with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
@@ -9,7 +9,6 @@ from __future__ import print_function
import numpy as np
import time
import ray
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
@@ -37,17 +36,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 50,
"config": {
"iterations": 100,
"LiNO3_vol": tune.uniform(0, 7),
"Li2SO4_vol": tune.uniform(0, 7),
"NaClO4_vol": tune.uniform(0, 7)
},
}
# Optional: Pass the parameter space yourself
# space = [{
@@ -75,11 +63,20 @@ if __name__ == "__main__":
df_search = ConcurrencyLimiter(df_search, max_concurrent=4)
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
objective,
metric="objective",
mode="max",
name="dragonfly_search",
search_alg=df_search,
scheduler=scheduler,
**tune_kwargs)
num_samples=10 if args.smoke_test else 50,
config={
"iterations": 100,
"LiNO3_vol": tune.uniform(0, 7),
"Li2SO4_vol": tune.uniform(0, 7),
"NaClO4_vol": tune.uniform(0, 7)
},
)
print("Best hyperparameters found were: ", analysis.best_config)
+8 -7
View File
@@ -1,8 +1,7 @@
"""This test checks that GeneticSearch is functional.
"""This example demonstrates the usage of GeneticSearch with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.automl import GeneticSearch
@@ -30,7 +29,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
space = SearchSpace({
ContinuousSpace("x1", 0, 4, 100),
@@ -40,16 +38,19 @@ if __name__ == "__main__":
DiscreteSpace("x5", [-1, 0, 1, 2, 3]),
})
config = {"stop": {"training_iteration": 100}}
algo = GeneticSearch(
space,
reward_attr="neg_mean_loss",
max_generation=2 if args.smoke_test else 10,
population_size=10 if args.smoke_test else 50)
scheduler = AsyncHyperBandScheduler(metric="neg_mean_loss", mode="max")
tune.run(
scheduler = AsyncHyperBandScheduler()
analysis = tune.run(
michalewicz_function,
metric="neg_mean_loss",
mode="max",
name="my_exp",
search_alg=algo,
scheduler=scheduler,
**config)
stop={"training_iteration": 100})
print("Best hyperparameters found were: ", analysis.best_config)
+3 -1
View File
@@ -112,7 +112,9 @@ if __name__ == "__main__":
replicate_pem=False)
analysis = tune.run(
horovod_trainable,
metric="loss",
mode="min",
config={"lr": tune.uniform(0.1, 1)},
num_samples=2 if args.smoke_test else 10,
fail_fast=True)
config = analysis.get_best_config(metric="loss", mode="min")
print("Best hyperparameters found were: ", analysis.best_config)
@@ -51,16 +51,14 @@ if __name__ == "__main__":
# Hyperband early stopping, configured with `episode_reward_mean` as the
# objective and `training_iteration` as the time unit,
# which is automatically filled by Tune.
hyperband = HyperBandScheduler(
time_attr="training_iteration",
metric="episode_reward_mean",
mode="max",
max_t=200)
hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200)
tune.run(
analysis = tune.run(
MyTrainableClass,
name="hyperband_test",
num_samples=20,
metric="episode_reward_mean",
mode="max",
stop={"training_iteration": 1 if args.smoke_test else 99999},
config={
"width": tune.randint(10, 90),
@@ -68,3 +66,5 @@ if __name__ == "__main__":
},
scheduler=hyperband,
fail_fast=True)
print("Best hyperparameters found were: ", analysis.best_config)
@@ -21,6 +21,8 @@ def train(config, checkpoint_dir=None):
v = np.tanh(float(timestep) / config.get("width", 1))
v *= config.get("height", 1)
# Checkpoint the state of the training every 3 steps
# Note that this is only required for certain schedulers
if timestep % 3 == 0:
with tune.checkpoint_dir(step=timestep) as checkpoint_dir:
path = os.path.join(checkpoint_dir, "checkpoint")
@@ -42,17 +44,16 @@ if __name__ == "__main__":
# Hyperband early stopping, configured with `episode_reward_mean` as the
# objective and `training_iteration` as the time unit,
# which is automatically filled by Tune.
hyperband = HyperBandScheduler(
time_attr="training_iteration",
metric="episode_reward_mean",
mode="max",
max_t=200)
hyperband = HyperBandScheduler(max_t=200)
tune.run(
analysis = tune.run(
train,
name="hyperband_test",
num_samples=20,
metric="episode_reward_mean",
mode="max",
stop={"training_iteration": 10 if args.smoke_test else 99999},
config={"height": tune.uniform(0, 100)},
scheduler=hyperband,
fail_fast=True)
print("Best hyperparameters found were: ", analysis.best_config)
+13 -14
View File
@@ -1,4 +1,4 @@
"""This test checks that HyperOpt is functional.
"""This example demonstrates the usage of HyperOpt with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
@@ -12,6 +12,7 @@ from ray.tune.suggest.hyperopt import HyperOptSearch
def evaluation_fn(step, width, height):
time.sleep(0.1)
return (0.1 + width * step / 100)**(-1) + height * 0.1
@@ -24,7 +25,6 @@ def easy_objective(config):
intermediate_score = evaluation_fn(step, width, height)
# Feed the score back back to Tune.
tune.report(iterations=step, mean_loss=intermediate_score)
time.sleep(0.1)
if __name__ == "__main__":
@@ -49,24 +49,23 @@ if __name__ == "__main__":
}
]
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 1000,
"config": {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
# This is an ignored parameter.
"activation": tune.choice(["relu", "tanh"])
}
}
algo = HyperOptSearch(points_to_evaluate=current_best_params)
algo = ConcurrencyLimiter(algo, max_concurrent=4)
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
search_alg=algo,
scheduler=scheduler,
metric="mean_loss",
mode="min",
**tune_kwargs)
num_samples=10 if args.smoke_test else 1000,
config={
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
# This is an ignored parameter.
"activation": tune.choice(["relu", "tanh"])
})
print("Best hyperparameters found were: ", analysis.best_config)
+5 -2
View File
@@ -5,6 +5,7 @@ import sklearn.metrics
from sklearn.model_selection import train_test_split
from ray import tune
from ray.tune.schedulers import ASHAScheduler
def LightGBMCallback(env):
@@ -41,11 +42,13 @@ if __name__ == "__main__":
"num_leaves": tune.randint(10, 1000),
"learning_rate": tune.loguniform(1e-8, 1e-1)
}
from ray.tune.schedulers import ASHAScheduler
tune.run(
analysis = tune.run(
train_breast_cancer,
metric="binary_error",
mode="min",
config=config,
num_samples=2,
scheduler=ASHAScheduler())
print("Best hyperparameters found were: ", analysis.best_config)
+18 -31
View File
@@ -1,12 +1,9 @@
#!/usr/bin/env python
import argparse
import json
import os
import numpy as np
import time
from ray import tune
from ray.tune import Trainable, run
class TestLogger(tune.logger.Logger):
@@ -18,34 +15,20 @@ def trial_str_creator(trial):
return "{}_{}_123".format(trial.trainable_name, trial.trial_id)
class MyTrainableClass(Trainable):
"""Example agent whose learning curve is a random sigmoid.
def evaluation_fn(step, width, height):
time.sleep(0.1)
return (0.1 + width * step / 100)**(-1) + height * 0.1
The dummy hyperparameters "width" and "height" determine the slope and
maximum reward value reached.
"""
def setup(self, config):
self.timestep = 0
def easy_objective(config):
# Hyperparameters
width, height = config["width"], config["height"]
def step(self):
self.timestep += 1
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
v *= self.config.get("height", 1)
# Here we use `episode_reward_mean`, but you can also report other
# objectives such as loss or accuracy.
return {"episode_reward_mean": v}
def save_checkpoint(self, checkpoint_dir):
path = os.path.join(checkpoint_dir, "checkpoint")
with open(path, "w") as f:
f.write(json.dumps({"timestep": self.timestep}))
return path
def load_checkpoint(self, checkpoint_path):
with open(checkpoint_path) as f:
self.timestep = json.loads(f.read())["timestep"]
for step in range(config["steps"]):
# Iterative training function - can be any arbitrary training procedure
intermediate_score = evaluation_fn(step, width, height)
# Feed the score back back to Tune.
tune.report(iterations=step, mean_loss=intermediate_score)
if __name__ == "__main__":
@@ -54,14 +37,18 @@ if __name__ == "__main__":
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
trials = run(
MyTrainableClass,
analysis = tune.run(
easy_objective,
name="hyperband_test",
metric="mean_loss",
mode="min",
num_samples=5,
trial_name_creator=trial_str_creator,
loggers=[TestLogger],
stop={"training_iteration": 1 if args.smoke_test else 99999},
config={
"steps": 100,
"width": tune.randint(10, 100),
"height": tune.loguniform(10, 100)
})
print("Best hyperparameters: ", analysis.best_config)
+3 -1
View File
@@ -90,7 +90,7 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
data_dir=data_dir,
num_epochs=num_epochs,
num_gpus=gpus_per_trial)
tune.run(
analysis = tune.run(
trainable,
resources_per_trial={
"cpu": 1,
@@ -102,6 +102,8 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
num_samples=num_samples,
name="tune_mnist")
print("Best hyperparameters found were: ", analysis.best_config)
if __name__ == "__main__":
import argparse
@@ -93,7 +93,6 @@ class LightningMNISTClassifier(pl.LightningModule):
self.log("ptl/val_loss", avg_loss)
self.log("ptl/val_accuracy", avg_acc)
@staticmethod
def download_data(data_dir):
transform = transforms.Compose([
@@ -177,7 +176,8 @@ def train_mnist_tune_checkpoint(config,
ckpt = pl_load(
os.path.join(checkpoint_dir, "checkpoint"),
map_location=lambda storage, loc: storage)
model = LightningMNISTClassifier._load_model_state(ckpt, config=config, data_dir=data_dir)
model = LightningMNISTClassifier._load_model_state(
ckpt, config=config, data_dir=data_dir)
trainer.current_epoch = ckpt["epoch"]
else:
model = LightningMNISTClassifier(config=config, data_dir=data_dir)
@@ -199,8 +199,6 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
}
scheduler = ASHAScheduler(
metric="loss",
mode="min",
max_t=num_epochs,
grace_period=1,
reduction_factor=2)
@@ -209,7 +207,7 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
metric_columns=["loss", "mean_accuracy", "training_iteration"])
tune.run(
analysis = tune.run(
tune.with_parameters(
train_mnist_tune,
data_dir=data_dir,
@@ -219,12 +217,16 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
"cpu": 1,
"gpu": gpus_per_trial
},
metric="loss",
mode="min",
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter,
name="tune_mnist_asha")
print("Best hyperparameters found were: ", analysis.best_config)
shutil.rmtree(data_dir)
# __tune_asha_end__
@@ -242,9 +244,6 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
}
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="loss",
mode="min",
perturbation_interval=4,
hyperparam_mutations={
"lr": tune.loguniform(1e-4, 1e-1),
@@ -255,7 +254,7 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
metric_columns=["loss", "mean_accuracy", "training_iteration"])
tune.run(
analysis = tune.run(
tune.with_parameters(
train_mnist_tune_checkpoint,
data_dir=data_dir,
@@ -265,12 +264,16 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
"cpu": 1,
"gpu": gpus_per_trial
},
metric="loss",
mode="min",
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter,
name="tune_mnist_pbt")
print("Best hyperparameters found were: ", analysis.best_config)
shutil.rmtree(data_dir)
# __tune_pbt_end__
@@ -88,5 +88,4 @@ if __name__ == "__main__":
"momentum": tune.uniform(0.1, 0.9),
})
print("Best config is:",
analysis.get_best_config(metric="mean_accuracy", mode="max"))
print("Best config is:", analysis.best_config)
+12 -16
View File
@@ -1,8 +1,6 @@
from functools import partial
import mxnet as mx
from ray import tune, logger
from ray.tune import CLIReporter
from ray.tune.integration.mxnet import TuneCheckpointCallback, \
TuneReportCallback
from ray.tune.schedulers import ASHAScheduler
@@ -59,25 +57,21 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10):
}
scheduler = ASHAScheduler(
metric="mean_accuracy",
mode="max",
max_t=num_epochs,
grace_period=1,
reduction_factor=2)
max_t=num_epochs, grace_period=1, reduction_factor=2)
reporter = CLIReporter(
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"])
tune.run(
partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
analysis = tune.run(
tune.with_parameters(
train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
resources_per_trial={
"cpu": 1,
},
metric="mean_accuracy",
mode="max",
config=config,
num_samples=num_samples,
scheduler=scheduler,
progress_reporter=reporter,
name="tune_mnist_mxnet")
return analysis
if __name__ == "__main__":
@@ -89,6 +83,8 @@ if __name__ == "__main__":
args, _ = parser.parse_known_args()
if args.smoke_test:
tune_mnist_mxnet(num_samples=1, num_epochs=1)
analysis = tune_mnist_mxnet(num_samples=1, num_epochs=1)
else:
tune_mnist_mxnet(num_samples=10, num_epochs=10)
analysis = tune_mnist_mxnet(num_samples=10, num_epochs=10)
print("Best hyperparameters found were: ", analysis.best_config)
+11 -16
View File
@@ -1,10 +1,9 @@
"""This test checks that Nevergrad is functional.
"""This example demonstrates the usage of Nevergrad with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
@@ -35,18 +34,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
# The config will be automatically converted to Nevergrad's search space
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 50,
"config": {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
"activation": tune.choice(["relu", "tanh"])
}
}
# Optional: Pass the parameter space yourself
# space = ng.p.Dict(
@@ -63,11 +50,19 @@ if __name__ == "__main__":
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
metric="mean_loss",
mode="min",
name="nevergrad",
search_alg=algo,
scheduler=scheduler,
**tune_kwargs)
num_samples=10 if args.smoke_test else 50,
config={
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
"activation": tune.choice(["relu", "tanh"])
})
print("Best hyperparameters found were: ", analysis.best_config)
+12 -13
View File
@@ -1,4 +1,4 @@
"""This test checks that Optuna is functional.
"""This example demonstrates the usage of Optuna with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
@@ -36,23 +36,22 @@ if __name__ == "__main__":
args, _ = parser.parse_known_args()
ray.init(configure_logging=False)
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 100,
"config": {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
# This is an ignored parameter.
"activation": tune.choice(["relu", "tanh"])
}
}
algo = OptunaSearch()
algo = ConcurrencyLimiter(algo, max_concurrent=4)
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
metric="mean_loss",
mode="min",
search_alg=algo,
scheduler=scheduler,
**tune_kwargs)
num_samples=10 if args.smoke_test else 100,
config={
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
# This is an ignored parameter.
"activation": tune.choice(["relu", "tanh"])
})
print("Best hyperparameters found were: ", analysis.best_config)
+5 -4
View File
@@ -18,19 +18,18 @@ if __name__ == "__main__":
ray.init()
pbt = PB2(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=20,
hyperparam_bounds={
# hyperparameter bounds.
"lr": [0.0001, 0.02],
})
tune.run(
analysis = tune.run(
pbt_function,
name="pbt_test",
scheduler=pbt,
metric="mean_accuracy",
mode="max",
verbose=False,
stop={
"training_iteration": 30,
@@ -43,3 +42,5 @@ if __name__ == "__main__":
# the model training in this example
"some_other_factor": 1,
})
print("Best hyperparameters found were: ", analysis.best_config)
@@ -4,7 +4,6 @@ import argparse
import pandas as pd
from datetime import datetime
import ray
from ray.tune import run, sample_from
from ray.tune.schedulers import PopulationBasedTraining
from ray.tune.schedulers.pb2 import PB2
@@ -46,7 +45,6 @@ if __name__ == "__main__":
parser.add_argument("--save_csv", type=bool, default=False)
args = parser.parse_args()
ray.init()
# bipedalwalker needs 1600
if args.env_name in ["BipedalWalker-v2", "BipedalWalker-v3"]:
@@ -86,8 +86,6 @@ if __name__ == "__main__":
# __pbt_begin__
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=5,
hyperparam_mutations={
# distribution for resampling
@@ -118,6 +116,8 @@ if __name__ == "__main__":
name="pbt_test",
scheduler=scheduler,
reuse_actors=True,
metric="mean_accuracy",
mode="max",
verbose=1,
stop=stopper,
export_formats=[ExportFormat.MODEL],
@@ -131,9 +131,8 @@ if __name__ == "__main__":
})
# __tune_end__
best_trial = analysis.get_best_trial("mean_accuracy", "max")
best_checkpoint = analysis.get_best_checkpoint(
best_trial, metric="mean_accuracy", mode="max")
best_trial = analysis.best_trial
best_checkpoint = analysis.best_checkpoint
restored_trainable = PytorchTrainable()
restored_trainable.restore(best_checkpoint)
best_model = restored_trainable.model
@@ -10,7 +10,6 @@ from torchvision import datasets
from ray.tune.examples.mnist_pytorch import train, test, ConvNet,\
get_data_loaders
import ray
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining
from ray.tune.trial import ExportFormat
@@ -66,14 +65,11 @@ if __name__ == "__main__":
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
datasets.MNIST("~/data", train=True, download=True)
# __pbt_begin__
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=5,
hyperparam_mutations={
# distribution for resampling
@@ -104,6 +100,8 @@ if __name__ == "__main__":
train_convnet,
name="pbt_test",
scheduler=scheduler,
metric="mean_accuracy",
mode="max",
verbose=1,
stop=stopper,
export_formats=[ExportFormat.MODEL],
@@ -116,9 +114,8 @@ if __name__ == "__main__":
})
# __tune_end__
best_trial = analysis.get_best_trial("mean_accuracy", mode="max")
best_checkpoint_path = analysis.get_best_checkpoint(
best_trial, metric="mean_accuracy", mode="max")
best_trial = analysis.best_trial
best_checkpoint_path = analysis.best_checkpoint
best_model = ConvNet()
best_checkpoint = torch.load(
os.path.join(best_checkpoint_path, "checkpoint"))
@@ -9,7 +9,6 @@ from ray.tune.schedulers import PopulationBasedTraining
import argparse
import os
from filelock import FileLock
import random
import torch
import torch.nn as nn
import torch.nn.parallel
@@ -105,9 +104,6 @@ if __name__ == "__main__":
mnist_model_ref = ray.put(mnist_cnn)
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="is_score",
mode="max",
perturbation_interval=5,
hyperparam_mutations={
# distribution for resampling
@@ -124,12 +120,12 @@ if __name__ == "__main__":
stop={
"training_iteration": tune_iter,
},
metric="is_score",
mode="max",
num_samples=8,
config={
"netG_lr": tune.sample_from(
lambda spec: random.choice([0.0001, 0.0002, 0.0005])),
"netD_lr": tune.sample_from(
lambda spec: random.choice([0.0001, 0.0002, 0.0005])),
"netG_lr": tune.choice([0.0001, 0.0002, 0.0005]),
"netD_lr": tune.choice([0.0001, 0.0002, 0.0005]),
"mnist_model_ref": mnist_model_ref
})
# __tune_end__
@@ -127,8 +127,6 @@ if __name__ == "__main__":
# __tune_begin__
scheduler = PopulationBasedTraining(
time_attr="training_iteration",
metric="is_score",
mode="max",
perturbation_interval=5,
hyperparam_mutations={
# distribution for resampling
@@ -143,6 +141,8 @@ if __name__ == "__main__":
scheduler=scheduler,
reuse_actors=True,
verbose=1,
metric="is_score",
mode="max",
checkpoint_at_end=True,
stop={
"training_iteration": tune_iter,
+7 -5
View File
@@ -5,11 +5,11 @@ import argparse
import random
import ray
from ray.tune import Trainable, run
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining
class PBTBenchmarkExample(Trainable):
class PBTBenchmarkExample(tune.Trainable):
"""Toy PBT problem for benchmarking adaptive learning rate.
The goal is to optimize this trainable's accuracy. The accuracy increases
@@ -93,8 +93,6 @@ if __name__ == "__main__":
pbt = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=20,
hyperparam_mutations={
# distribution for resampling
@@ -103,10 +101,12 @@ if __name__ == "__main__":
"some_other_factor": [1, 2],
})
run(
analysis = tune.run(
PBTBenchmarkExample,
name="pbt_test",
scheduler=pbt,
metric="mean_accuracy",
mode="max",
reuse_actors=True,
checkpoint_freq=20,
verbose=False,
@@ -120,3 +120,5 @@ if __name__ == "__main__":
# the model training in this example
"some_other_factor": 1,
})
print("Best hyperparameters found were: ", analysis.best_config)
+7 -4
View File
@@ -75,7 +75,8 @@ def pbt_function(config, checkpoint_dir=None):
cur_lr=lr,
optimal_lr=optimal_lr, # for debugging
q_err=q_err, # for debugging
done=accuracy > midpoint * 2)
done=accuracy > midpoint * 2 # this stops the training process
)
if __name__ == "__main__":
@@ -90,8 +91,6 @@ if __name__ == "__main__":
pbt = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=4,
hyperparam_mutations={
# distribution for resampling
@@ -100,11 +99,13 @@ if __name__ == "__main__":
"some_other_factor": [1, 2],
})
tune.run(
analysis = tune.run(
pbt_function,
name="pbt_test",
scheduler=pbt,
verbose=False,
metric="mean_accuracy",
mode="max",
stop={
"training_iteration": 30,
},
@@ -116,3 +117,5 @@ if __name__ == "__main__":
# the model training in this example
"some_other_factor": 1,
})
print("Best hyperparameters found were: ", analysis.best_config)
@@ -268,9 +268,6 @@ if __name__ == "__main__":
read_data()
pbt = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=2,
hyperparam_mutations={
"dropout": lambda: np.random.uniform(0, 1),
@@ -282,6 +279,8 @@ if __name__ == "__main__":
MemNNModel,
name="pbt_babi_memnn",
scheduler=pbt,
metric="mean_accuracy",
mode="max",
stop={"training_iteration": 4 if args.smoke_test else 100},
num_samples=2,
config={
+9 -12
View File
@@ -11,8 +11,7 @@ computationally demanding example.
import random
import ray
from ray.tune import run, sample_from
from ray import tune
from ray.tune.schedulers import PopulationBasedTraining
if __name__ == "__main__":
@@ -29,8 +28,6 @@ if __name__ == "__main__":
pbt = PopulationBasedTraining(
time_attr="time_total_s",
metric="episode_reward_mean",
mode="max",
perturbation_interval=120,
resample_probability=0.25,
# Specifies the mutations of these hyperparams
@@ -44,12 +41,13 @@ if __name__ == "__main__":
},
custom_explore_fn=explore)
ray.init()
run(
analysis = tune.run(
"PPO",
name="pbt_humanoid_test",
scheduler=pbt,
num_samples=8,
metric="episode_reward_mean",
mode="max",
config={
"env": "Humanoid-v1",
"kl_coeff": 1.0,
@@ -63,10 +61,9 @@ if __name__ == "__main__":
"clip_param": 0.2,
"lr": 1e-4,
# These params start off randomly drawn from a set.
"num_sgd_iter": sample_from(
lambda spec: random.choice([10, 20, 30])),
"sgd_minibatch_size": sample_from(
lambda spec: random.choice([128, 512, 2048])),
"train_batch_size": sample_from(
lambda spec: random.choice([10000, 20000, 40000]))
"num_sgd_iter": tune.choice([10, 20, 30]),
"sgd_minibatch_size": tune.choice([128, 512, 2048]),
"train_batch_size": tune.choice([10000, 20000, 40000])
})
print("best hyperparameters: ", analysis.best_config)
@@ -22,8 +22,7 @@ from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.python.keras.models import Model, load_model
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
import ray
from ray.tune import grid_search, run, sample_from
from ray import tune
from ray.tune import Trainable
from ray.tune.schedulers import PopulationBasedTraining
@@ -184,38 +183,39 @@ if __name__ == "__main__":
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
train_spec = {
"resources_per_trial": {
"cpu": 1,
"gpu": 1
},
"stop": {
"mean_accuracy": 0.80,
"training_iteration": 30,
},
"config": {
"epochs": 1,
"batch_size": 64,
"lr": grid_search([10**-4, 10**-5]),
"decay": sample_from(lambda spec: spec.config.lr / 100.0),
"dropout": grid_search([0.25, 0.5]),
},
"num_samples": 4,
space = {
"epochs": 1,
"batch_size": 64,
"lr": tune.grid_search([10**-4, 10**-5]),
"decay": tune.sample_from(lambda spec: spec.config.lr / 100.0),
"dropout": tune.grid_search([0.25, 0.5]),
}
if args.smoke_test:
train_spec["config"]["lr"] = 10**-4
train_spec["config"]["dropout"] = 0.5
ray.init()
space["lr"] = 10**-4
space["dropout"] = 0.5
pbt = PopulationBasedTraining(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
perturbation_interval=10,
hyperparam_mutations={
"dropout": lambda _: np.random.uniform(0, 1),
})
run(Cifar10Model, name="pbt_cifar10", scheduler=pbt, **train_spec)
analysis = tune.run(
Cifar10Model,
name="pbt_cifar10",
scheduler=pbt,
resources_per_trial={
"cpu": 1,
"gpu": 1
},
stop={
"mean_accuracy": 0.80,
"training_iteration": 30,
},
config=space,
num_samples=4,
metric="mean_accuracy",
mode="max",
)
print("Best hyperparameters found were: ", analysis.best_config)
+6 -12
View File
@@ -1,10 +1,9 @@
"""This test checks that SigOpt is functional.
"""This example demonstrates the usage of SigOpt with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
from ray import tune
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest.sigopt import SigOptSearch
@@ -37,7 +36,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
space = [
{
@@ -57,13 +55,6 @@ if __name__ == "__main__":
},
},
]
config = {
"num_samples": 10 if args.smoke_test else 1000,
"config": {
"steps": 10
}
}
algo = SigOptSearch(
space,
name="SigOpt Example Experiment",
@@ -71,9 +62,12 @@ if __name__ == "__main__":
metric="mean_loss",
mode="min")
scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
tune.run(
analysis = tune.run(
easy_objective,
name="my_exp",
search_alg=algo,
scheduler=scheduler,
**config)
num_samples=10 if args.smoke_test else 1000,
config={"steps": 10})
print("Best hyperparameters found were: ", analysis.best_config)
@@ -1,13 +1,9 @@
"""This test checks that SigOpt is functional.
"""Example using Sigopt's multi-objective functionality."""
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
import numpy as np
from ray import tune
from ray.tune.schedulers import FIFOScheduler
from ray.tune.suggest.sigopt import SigOptSearch
np.random.seed(0)
@@ -41,7 +37,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
space = [
{
@@ -54,13 +49,6 @@ if __name__ == "__main__":
},
]
config = {
"num_samples": 10 if args.smoke_test else 1000,
"config": {
"total_weight": 1
}
}
algo = SigOptSearch(
space,
name="SigOpt Example Multi Objective Experiment",
@@ -69,11 +57,10 @@ if __name__ == "__main__":
metric=["average", "std", "sharpe"],
mode=["max", "min", "obs"])
scheduler = FIFOScheduler()
tune.run(
analysis = tune.run(
easy_objective,
name="my_exp",
search_alg=algo,
scheduler=scheduler,
**config)
num_samples=10 if args.smoke_test else 1000,
config={"total_weight": 1})
print("Best hyperparameters found were: ", analysis.best_config)
@@ -1,14 +1,8 @@
"""This test checks that SigOpt is functional.
""""Example using Sigopt's support for prior beliefs."""
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
import numpy as np
from ray import tune
from ray.tune.schedulers import FIFOScheduler
from ray.tune.suggest.sigopt import SigOptSearch
np.random.seed(0)
@@ -36,7 +30,6 @@ def easy_objective(config):
average, std = evaluate(w1, w2, w3)
tune.report(average=average, std=std)
time.sleep(0.1)
if __name__ == "__main__":
@@ -51,8 +44,6 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
samples = 10 if args.smoke_test else 1000
conn = Connection(client_token=os.environ["SIGOPT_KEY"])
@@ -90,8 +81,6 @@ if __name__ == "__main__":
observation_budget=samples,
parallel_bandwidth=1)
config = {"num_samples": samples, "config": {}}
algo = SigOptSearch(
connection=conn,
experiment_id=experiment.id,
@@ -100,11 +89,10 @@ if __name__ == "__main__":
metric=["average", "std"],
mode=["obs", "min"])
scheduler = FIFOScheduler()
tune.run(
analysis = tune.run(
easy_objective,
name="my_exp",
search_alg=algo,
scheduler=scheduler,
**config)
num_samples=samples,
config={})
print("Best hyperparameters found were: ", analysis.best_config)
+11 -15
View File
@@ -1,10 +1,9 @@
"""This test checks that Skopt is functional.
"""This example demonstrates the usage of Skopt with Ray Tune.
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
from ray import tune
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.schedulers import AsyncHyperBandScheduler
@@ -12,6 +11,7 @@ from ray.tune.suggest.skopt import SkOptSearch
def evaluation_fn(step, width, height):
time.sleep(0.1)
return (0.1 + width * step / 100)**(-1) + height * 0.1
@@ -24,7 +24,6 @@ def easy_objective(config):
intermediate_score = evaluation_fn(step, width, height)
# Feed the score back back to Tune.
tune.report(iterations=step, mean_loss=intermediate_score)
time.sleep(0.1)
if __name__ == "__main__":
@@ -34,18 +33,8 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
# The config will be automatically converted to SkOpt's search space
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 50,
"config": {
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
"activation": tune.choice(["relu", "tanh"])
}
}
# Optional: Pass the parameter space yourself
# space = {
@@ -66,11 +55,18 @@ if __name__ == "__main__":
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
metric="mean_loss",
mode="min",
name="skopt_exp_with_warmstart",
search_alg=algo,
scheduler=scheduler,
**tune_kwargs)
num_samples=10 if args.smoke_test else 50,
config={
"steps": 100,
"width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100),
"activation": tune.choice(["relu", "tanh"])
})
print("Best hyperparameters found were: ", analysis.best_config)
@@ -86,16 +86,15 @@ if __name__ == "__main__":
use_gpu=args.use_gpu,
num_workers=2,
)
sched = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
max_t=400,
grace_period=20)
tune.run(
sched = AsyncHyperBandScheduler(max_t=400, grace_period=20)
analysis = tune.run(
tf_trainable,
name="exp",
scheduler=sched,
metric="mean_accuracy",
mode="max",
stop={
"mean_accuracy": 0.99,
"training_iteration": 10
@@ -108,3 +107,4 @@ if __name__ == "__main__":
"hidden": tune.sample_from(
lambda spec: np.random.randint(32, 512)),
})
print("Best hyperparameters found were: ", analysis.best_config)
+5 -1
View File
@@ -116,8 +116,12 @@ class MNISTTrainable(tune.Trainable):
if __name__ == "__main__":
load_data() # we download data on the driver to avoid race conditions.
tune.run(
analysis = tune.run(
MNISTTrainable,
metric="test_loss",
mode="min",
stop={"training_iteration": 5 if args.smoke_test else 50},
verbose=1,
config={"hiddens": tune.grid_search([32, 64, 128])})
print("Best hyperparameters found were: ", analysis.best_config)
+6 -16
View File
@@ -13,6 +13,7 @@ from mxnet.gluon.data.vision import transforms
from gluoncv.model_zoo import get_model
from gluoncv.data import transforms as gcv_transforms
from ray.tune.schedulers import create_scheduler
from ray import tune
# Training settings
@@ -185,19 +186,9 @@ def train_cifar10(config):
if __name__ == "__main__":
args = parser.parse_args()
sched = create_scheduler(args.scheduler)
import ray
from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler
ray.init()
if args.scheduler == "fifo":
sched = FIFOScheduler()
elif args.scheduler == "asynchyperband":
sched = AsyncHyperBandScheduler(
metric="mean_loss", mode="min", max_t=400, grace_period=60)
else:
raise NotImplementedError
tune.run(
analysis = tune.run(
train_cifar10,
name=args.expname,
verbose=2,
@@ -213,8 +204,7 @@ if __name__ == "__main__":
num_samples=1 if args.smoke_test else args.num_samples,
config={
"args": args,
"lr": tune.sample_from(
lambda spec: np.power(10.0, np.random.uniform(-4, -1))),
"momentum": tune.sample_from(
lambda spec: np.random.uniform(0.85, 0.95)),
"lr": tune.loguniform(1e-4, 1e-1),
"momentum": tune.uniform(0.85, 0.95),
})
print("Best hyperparameters found were: ", analysis.best_config)
+8 -12
View File
@@ -1,5 +1,4 @@
import argparse
import numpy as np
from tensorflow.keras.datasets import mnist
from ray.tune.integration.keras import TuneReportCallback
@@ -52,16 +51,14 @@ if __name__ == "__main__":
ray.init(num_cpus=4 if args.smoke_test else None)
sched = AsyncHyperBandScheduler(
time_attr="training_iteration",
metric="mean_accuracy",
mode="max",
max_t=400,
grace_period=20)
time_attr="training_iteration", max_t=400, grace_period=20)
tune.run(
analysis = tune.run(
train_mnist,
name="exp",
scheduler=sched,
metric="mean_accuracy",
mode="max",
stop={
"mean_accuracy": 0.99,
"training_iteration": 5 if args.smoke_test else 300
@@ -73,9 +70,8 @@ if __name__ == "__main__":
},
config={
"threads": 2,
"lr": tune.sample_from(lambda spec: np.random.uniform(0.001, 0.1)),
"momentum": tune.sample_from(
lambda spec: np.random.uniform(0.1, 0.9)),
"hidden": tune.sample_from(
lambda spec: np.random.randint(32, 512)),
"lr": tune.uniform(0.001, 0.1),
"momentum": tune.uniform(0.1, 0.9),
"hidden": tune.randint(32, 512),
})
print("Best hyperparameters found were: ", analysis.best_config)
+12 -3
View File
@@ -20,8 +20,10 @@ def train_function(config, checkpoint_dir=None):
def tune_function(api_key_file):
"""Example for using a WandbLogger with the function API"""
tune.run(
analysis = tune.run(
train_function,
metric="loss",
mode="min",
config={
"mean": tune.grid_search([1, 2, 3, 4, 5]),
"sd": tune.uniform(0.2, 0.8),
@@ -31,6 +33,7 @@ def tune_function(api_key_file):
}
},
loggers=DEFAULT_LOGGERS + (WandbLogger, ))
return analysis.best_config
@wandb_mixin
@@ -43,8 +46,10 @@ def decorated_train_function(config, checkpoint_dir=None):
def tune_decorated(api_key_file):
"""Example for using the @wandb_mixin decorator with the function API"""
tune.run(
analysis = tune.run(
decorated_train_function,
metric="loss",
mode="min",
config={
"mean": tune.grid_search([1, 2, 3, 4, 5]),
"sd": tune.uniform(0.2, 0.8),
@@ -53,6 +58,7 @@ def tune_decorated(api_key_file):
"project": "Wandb_example"
}
})
return analysis.best_config
class WandbTrainable(WandbTrainableMixin, Trainable):
@@ -65,8 +71,10 @@ class WandbTrainable(WandbTrainableMixin, Trainable):
def tune_trainable(api_key_file):
"""Example for using a WandTrainableMixin with the class API"""
tune.run(
analysis = tune.run(
WandbTrainable,
metric="loss",
mode="min",
config={
"mean": tune.grid_search([1, 2, 3, 4, 5]),
"sd": tune.uniform(0.2, 0.8),
@@ -75,6 +83,7 @@ def tune_trainable(api_key_file):
"project": "Wandb_example"
}
})
return analysis.best_config
if __name__ == "__main__":
+10 -6
View File
@@ -1,5 +1,6 @@
import sklearn.datasets
import sklearn.metrics
import os
from ray.tune.schedulers import ASHAScheduler
from sklearn.model_selection import train_test_split
import xgboost as xgb
@@ -8,7 +9,8 @@ from ray import tune
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
def train_breast_cancer(config):
def train_breast_cancer(config: dict):
# This is a simple training function to be passed into Tune
# Load dataset
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
# Split into train and test set
@@ -17,7 +19,7 @@ def train_breast_cancer(config):
# Build input matrices for XGBoost
train_set = xgb.DMatrix(train_x, label=train_y)
test_set = xgb.DMatrix(test_x, label=test_y)
# Train the classifier
# Train the classifier, using the Tune callback
xgb.train(
config,
train_set,
@@ -27,7 +29,8 @@ def train_breast_cancer(config):
if __name__ == "__main__":
config = {
search_space = {
# You can mix constants with search space objects.
"objective": "binary:logistic",
"eval_metric": ["logloss", "error"],
"max_depth": tune.randint(1, 9),
@@ -35,6 +38,7 @@ if __name__ == "__main__":
"subsample": tune.uniform(0.5, 1.0),
"eta": tune.loguniform(1e-4, 1e-1)
}
# This will enable aggressive early stopping of bad trials.
scheduler = ASHAScheduler(
max_t=10, # 10 training iterations
grace_period=1,
@@ -44,13 +48,13 @@ if __name__ == "__main__":
train_breast_cancer,
metric="eval-logloss",
mode="min",
resources_per_trial={"cpu": 1}, # You can add "gpu": 0.1 here
config=config,
# You can add "gpu": 0.1 to allocate GPUs
resources_per_trial={"cpu": 1},
config=search_space,
num_samples=10,
scheduler=scheduler)
# Load the best model checkpoint
import os
best_bst = xgb.Booster()
best_bst.load_model(os.path.join(analysis.best_checkpoint, "model.xgb"))
accuracy = 1. - analysis.best_result["eval-error"]
+15 -18
View File
@@ -1,10 +1,9 @@
"""This test checks that ZOOptSearch is functional.
"""This example demonstrates the usage of ZOOptSearch.
It also checks that it is usable with a separate scheduler.
"""
import time
import ray
from ray import tune
from ray.tune.suggest.zoopt import ZOOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
@@ -12,6 +11,7 @@ from zoopt import ValueType # noqa: F401
def evaluation_fn(step, width, height):
time.sleep(0.1)
return (0.1 + width * step / 100)**(-1) + height * 0.1
@@ -24,7 +24,6 @@ def easy_objective(config):
intermediate_score = evaluation_fn(step, width, height)
# Feed the score back back to Tune.
tune.report(iterations=step, mean_loss=intermediate_score)
time.sleep(0.1)
if __name__ == "__main__":
@@ -34,16 +33,8 @@ if __name__ == "__main__":
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
ray.init()
tune_kwargs = {
"num_samples": 10 if args.smoke_test else 1000,
"config": {
"steps": 10,
"height": tune.quniform(-10, 10, 1e-2),
"width": tune.randint(0, 10)
}
}
num_samples = 10 if args.smoke_test else 1000
# Optional: Pass the parameter space yourself
# space = {
@@ -61,17 +52,23 @@ if __name__ == "__main__":
zoopt_search = ZOOptSearch(
algo="Asracos", # only support ASRacos currently
budget=tune_kwargs["num_samples"],
budget=num_samples,
# dim_dict=space, # If you want to set the space yourself
metric="mean_loss",
mode="min",
**zoopt_search_config)
scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
scheduler = AsyncHyperBandScheduler()
tune.run(
analysis = tune.run(
easy_objective,
metric="mean_loss",
mode="min",
search_alg=zoopt_search,
name="zoopt_search",
scheduler=scheduler,
**tune_kwargs)
num_samples=num_samples,
config={
"steps": 10,
"height": tune.quniform(-10, 10, 1e-2),
"width": tune.randint(0, 10)
})
print("Best config found: ", analysis.best_config)
@@ -2,7 +2,7 @@ import os
import argparse
from ray.tune import run
from ray.tune.examples.async_hyperband_example import MyTrainableClass
from ray.tune.utils._mock_trainable import MyTrainableClass
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.suggest.suggestion import ConcurrencyLimiter
+5 -1
View File
@@ -25,6 +25,7 @@ from ray.tune.syncer import CloudSyncer, SyncerCallback, get_node_syncer
from ray.tune.utils.trainable import TrainableUtil
from ray.tune.trial import Trial
from ray.tune.trial_runner import TrialRunner
from ray.tune.utils._mock_trainable import MyTrainableClass
from ray.tune.utils.mock import (MockDurableTrainer, MockRemoteTrainer,
MockNodeSyncer, mock_storage_client,
MOCK_REMOTE_DIR)
@@ -746,7 +747,6 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
cluster = start_connected_cluster
dirpath = str(tmpdir)
local_checkpoint_dir = os.path.join(dirpath, "experiment")
from ray.tune.examples.async_hyperband_example import MyTrainableClass
from ray.tune import register_trainable
register_trainable("trainable", MyTrainableClass)
@@ -770,6 +770,8 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
if trials and len(trials) >= 10:
break
time.sleep(.5)
else:
raise ValueError(f"Didn't generate enough trials: {len(trials)}")
if not TrialRunner.checkpoint_exists(local_checkpoint_dir):
raise RuntimeError(
@@ -792,8 +794,10 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
runner = TrialRunner(
resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir)
trials = runner.get_trials()
if len(trials) == 0:
continue # nonblocking script hasn't resumed yet, wait
reached = True
assert len(trials) >= 10
assert len(trials) <= 20
@@ -8,7 +8,7 @@ from numpy import nan
import ray
from ray import tune
from ray.tune.examples.async_hyperband_example import MyTrainableClass
from ray.tune.utils.mock import MyTrainableClass
class ExperimentAnalysisSuite(unittest.TestCase):
@@ -11,7 +11,7 @@ import numpy as np
import ray
from ray.tune import (run, Trainable, sample_from, Analysis,
ExperimentAnalysis, grid_search)
from ray.tune.examples.async_hyperband_example import MyTrainableClass
from ray.tune.utils.mock import MyTrainableClass
class ExperimentAnalysisInMemorySuite(unittest.TestCase):
+1 -1
View File
@@ -116,7 +116,7 @@ class TuneExampleTest(unittest.TestCase):
validate_save_restore(MyTrainableClass, use_object_store=True)
def testAsyncHyperbandExample(self):
from ray.tune.examples.async_hyperband_example import MyTrainableClass
from ray.tune.utils.mock import MyTrainableClass
validate_save_restore(MyTrainableClass)
validate_save_restore(MyTrainableClass, use_object_store=True)
+34
View File
@@ -0,0 +1,34 @@
import os
import json
import numpy as np
from ray.tune import Trainable
class MyTrainableClass(Trainable):
"""Example agent whose learning curve is a random sigmoid.
The dummy hyperparameters "width" and "height" determine the slope and
maximum reward value reached.
"""
def setup(self, config):
self.timestep = 0
def step(self):
self.timestep += 1
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
v *= self.config.get("height", 1)
# Here we use `episode_reward_mean`, but you can also report other
# objectives such as loss or accuracy.
return {"episode_reward_mean": v}
def save_checkpoint(self, checkpoint_dir):
path = os.path.join(checkpoint_dir, "checkpoint")
with open(path, "w") as f:
f.write(json.dumps({"timestep": self.timestep}))
return path
def load_checkpoint(self, checkpoint_path):
with open(checkpoint_path) as f:
self.timestep = json.loads(f.read())["timestep"]
+33 -1
View File
@@ -1,9 +1,11 @@
import os
import numpy as np
import json
import ray.utils
from ray.rllib.agents.mock import _MockTrainer
from ray.tune import DurableTrainable
from ray.tune import DurableTrainable, Trainable
from ray.tune.sync_client import get_sync_client
from ray.tune.syncer import NodeSyncer
@@ -56,3 +58,33 @@ class MockDurableTrainer(DurableTrainable, _MockTrainer):
def _create_storage_client(self):
return mock_storage_client()
class MyTrainableClass(Trainable):
"""Example agent whose learning curve is a random sigmoid.
The dummy hyperparameters "width" and "height" determine the slope and
maximum reward value reached.
"""
def setup(self, config):
self.timestep = 0
def step(self):
self.timestep += 1
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
v *= self.config.get("height", 1)
# Here we use `episode_reward_mean`, but you can also report other
# objectives such as loss or accuracy.
return {"episode_reward_mean": v}
def save_checkpoint(self, checkpoint_dir):
path = os.path.join(checkpoint_dir, "checkpoint")
with open(path, "w") as f:
f.write(json.dumps({"timestep": self.timestep}))
return path
def load_checkpoint(self, checkpoint_path):
with open(checkpoint_path) as f:
self.timestep = json.loads(f.read())["timestep"]