mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 12:28:10 +08:00
[tune] refactor and add examples (#11931)
This commit is contained in:
@@ -1,45 +1,27 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray.tune import Trainable, run, sample_from
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
|
||||
|
||||
class MyTrainableClass(Trainable):
|
||||
"""Example agent whose learning curve is a random sigmoid.
|
||||
def evaluation_fn(step, width, height):
|
||||
time.sleep(0.1)
|
||||
return (0.1 + width * step / 100)**(-1) + height * 0.1
|
||||
|
||||
The dummy hyperparameters "width" and "height" determine the slope and
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def setup(self, config):
|
||||
self.timestep = 0
|
||||
def easy_objective(config):
|
||||
# Hyperparameters
|
||||
width, height = config["width"], config["height"]
|
||||
|
||||
def step(self):
|
||||
self.timestep += 1
|
||||
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
|
||||
v *= self.config.get("height", 1)
|
||||
|
||||
# Here we use `episode_reward_mean`, but you can also report other
|
||||
# objectives such as loss or accuracy.
|
||||
return {"episode_reward_mean": v}
|
||||
|
||||
def save_checkpoint(self, checkpoint_dir):
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
with open(path, "w") as f:
|
||||
f.write(json.dumps({"timestep": self.timestep}))
|
||||
return path
|
||||
|
||||
def load_checkpoint(self, checkpoint_path):
|
||||
with open(checkpoint_path) as f:
|
||||
self.timestep = json.loads(f.read())["timestep"]
|
||||
for step in range(config["steps"]):
|
||||
# Iterative training function - can be an arbitrary training procedure
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -48,31 +30,33 @@ if __name__ == "__main__":
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
parser.add_argument(
|
||||
"--ray-address",
|
||||
help="Address of Ray cluster for seamless distributed execution.")
|
||||
help="Address of Ray cluster for seamless distributed execution.",
|
||||
required=False)
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init(address=args.ray_address)
|
||||
|
||||
# asynchronous hyperband early stopping, configured with
|
||||
# `episode_reward_mean` as the
|
||||
# objective and `training_iteration` as the time unit,
|
||||
# which is automatically filled by Tune.
|
||||
ahb = AsyncHyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
grace_period=5,
|
||||
max_t=100)
|
||||
# AsyncHyperBand enables aggressive early stopping of bad trials.
|
||||
scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100)
|
||||
|
||||
run(MyTrainableClass,
|
||||
# 'training_iteration' is incremented every time `trainable.step` is called
|
||||
stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999}
|
||||
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="asynchyperband_test",
|
||||
scheduler=ahb,
|
||||
stop={"training_iteration": 1 if args.smoke_test else 99999},
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
scheduler=scheduler,
|
||||
stop=stopping_criteria,
|
||||
num_samples=20,
|
||||
verbose=1,
|
||||
resources_per_trial={
|
||||
"cpu": 1,
|
||||
"gpu": 0
|
||||
},
|
||||
config={
|
||||
"width": sample_from(lambda spec: 10 + int(90 * random.random())),
|
||||
"height": sample_from(lambda spec: int(100 * random.random())),
|
||||
config={ # Hyperparameter space
|
||||
"steps": 100,
|
||||
"width": tune.uniform(10, 100),
|
||||
"height": tune.uniform(0, 100),
|
||||
})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
"""This test checks that AxSearch is functional.
|
||||
"""This example demonstrates the usage of AxSearch with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
from ray.tune.suggest.ax import AxSearch
|
||||
@@ -52,11 +51,21 @@ if __name__ == "__main__":
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
ray.init()
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 50,
|
||||
"config": {
|
||||
algo = AxSearch(
|
||||
max_concurrent=4,
|
||||
parameter_constraints=["x1 + x2 <= 2.0"], # Optional.
|
||||
outcome_constraints=["l2norm <= 1.25"], # Optional.
|
||||
)
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="ax",
|
||||
metric="hartmann6", # provided in the 'easy_objective' function
|
||||
mode="min",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
num_samples=10 if args.smoke_test else 50,
|
||||
config={
|
||||
"iterations": 100,
|
||||
"x1": tune.uniform(0.0, 1.0),
|
||||
"x2": tune.uniform(0.0, 1.0),
|
||||
@@ -65,21 +74,6 @@ if __name__ == "__main__":
|
||||
"x5": tune.uniform(0.0, 1.0),
|
||||
"x6": tune.uniform(0.0, 1.0),
|
||||
},
|
||||
"stop": {
|
||||
"timesteps_total": 100
|
||||
}
|
||||
}
|
||||
algo = AxSearch(
|
||||
max_concurrent=4,
|
||||
metric="hartmann6",
|
||||
mode="min",
|
||||
parameter_constraints=["x1 + x2 <= 2.0"], # Optional.
|
||||
outcome_constraints=["l2norm <= 1.25"], # Optional.
|
||||
)
|
||||
scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="min")
|
||||
tune.run(
|
||||
easy_objective,
|
||||
name="ax",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
stop={"timesteps_total": 100})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""This test checks that BayesOpt is functional.
|
||||
"""This example demonstrates the usage of BayesOpt with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
@@ -34,16 +33,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 1000,
|
||||
"config": {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100)
|
||||
}
|
||||
}
|
||||
algo = BayesOptSearch(utility_kwargs={
|
||||
"kind": "ucb",
|
||||
"kappa": 2.5,
|
||||
@@ -51,11 +41,18 @@ if __name__ == "__main__":
|
||||
})
|
||||
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="my_exp",
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 1000,
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100)
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -63,24 +63,22 @@ if __name__ == "__main__":
|
||||
# CS.CategoricalHyperparameter(
|
||||
# "activation", choices=["relu", "tanh"]))
|
||||
|
||||
experiment_metrics = dict(metric="episode_reward_mean", mode="max")
|
||||
|
||||
bohb_hyperband = HyperBandForBOHB(
|
||||
time_attr="training_iteration",
|
||||
max_t=100,
|
||||
reduction_factor=4,
|
||||
**experiment_metrics)
|
||||
time_attr="training_iteration", max_t=100, reduction_factor=4)
|
||||
|
||||
bohb_search = TuneBOHB(
|
||||
# space=config_space, # If you want to set the space manually
|
||||
max_concurrent=4,
|
||||
**experiment_metrics)
|
||||
max_concurrent=4)
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
MyTrainableClass,
|
||||
name="bohb_test",
|
||||
config=config,
|
||||
scheduler=bohb_hyperband,
|
||||
search_alg=bohb_search,
|
||||
num_samples=10,
|
||||
stop={"training_iteration": 100})
|
||||
stop={"training_iteration": 100},
|
||||
metric="episode_reward_mean",
|
||||
mode="max")
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -14,7 +14,6 @@ import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune import CLIReporter
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
# __import_end__
|
||||
|
||||
@@ -187,21 +186,18 @@ def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2):
|
||||
"batch_size": tune.choice([2, 4, 8, 16])
|
||||
}
|
||||
scheduler = ASHAScheduler(
|
||||
metric="loss",
|
||||
mode="min",
|
||||
max_t=max_num_epochs,
|
||||
grace_period=1,
|
||||
reduction_factor=2)
|
||||
reporter = CLIReporter(
|
||||
# parameter_columns=["l1", "l2", "lr", "batch_size"],
|
||||
metric_columns=["loss", "accuracy", "training_iteration"])
|
||||
result = tune.run(
|
||||
partial(train_cifar, data_dir=data_dir),
|
||||
tune.with_parameters(train_cifar, data_dir=data_dir),
|
||||
resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
|
||||
config=config,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
num_samples=num_samples,
|
||||
scheduler=scheduler,
|
||||
progress_reporter=reporter)
|
||||
scheduler=scheduler
|
||||
)
|
||||
|
||||
best_trial = result.get_best_trial("loss", "min", "last")
|
||||
print("Best trial config: {}".format(best_trial.config))
|
||||
|
||||
@@ -72,4 +72,11 @@ if __name__ == "__main__":
|
||||
ray.init(**options)
|
||||
trainable_cls = DistributedTrainableCreator(
|
||||
train_mnist, num_workers=args.num_workers, use_gpu=args.use_gpu)
|
||||
tune.run(trainable_cls, num_samples=4, stop={"training_iteration": 10})
|
||||
analysis = tune.run(
|
||||
trainable_cls,
|
||||
num_samples=4,
|
||||
stop={"training_iteration": 10},
|
||||
metric="mean_accuracy",
|
||||
mode="max")
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""This test checks that Dragonfly is functional.
|
||||
"""This example demonstrates the usage of Dragonfly with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
@@ -9,7 +9,6 @@ from __future__ import print_function
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
@@ -37,17 +36,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 50,
|
||||
"config": {
|
||||
"iterations": 100,
|
||||
"LiNO3_vol": tune.uniform(0, 7),
|
||||
"Li2SO4_vol": tune.uniform(0, 7),
|
||||
"NaClO4_vol": tune.uniform(0, 7)
|
||||
},
|
||||
}
|
||||
|
||||
# Optional: Pass the parameter space yourself
|
||||
# space = [{
|
||||
@@ -75,11 +63,20 @@ if __name__ == "__main__":
|
||||
df_search = ConcurrencyLimiter(df_search, max_concurrent=4)
|
||||
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
objective,
|
||||
metric="objective",
|
||||
mode="max",
|
||||
name="dragonfly_search",
|
||||
search_alg=df_search,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 50,
|
||||
config={
|
||||
"iterations": 100,
|
||||
"LiNO3_vol": tune.uniform(0, 7),
|
||||
"Li2SO4_vol": tune.uniform(0, 7),
|
||||
"NaClO4_vol": tune.uniform(0, 7)
|
||||
},
|
||||
)
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
"""This test checks that GeneticSearch is functional.
|
||||
"""This example demonstrates the usage of GeneticSearch with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
from ray.tune.automl import GeneticSearch
|
||||
@@ -30,7 +29,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
space = SearchSpace({
|
||||
ContinuousSpace("x1", 0, 4, 100),
|
||||
@@ -40,16 +38,19 @@ if __name__ == "__main__":
|
||||
DiscreteSpace("x5", [-1, 0, 1, 2, 3]),
|
||||
})
|
||||
|
||||
config = {"stop": {"training_iteration": 100}}
|
||||
algo = GeneticSearch(
|
||||
space,
|
||||
reward_attr="neg_mean_loss",
|
||||
max_generation=2 if args.smoke_test else 10,
|
||||
population_size=10 if args.smoke_test else 50)
|
||||
scheduler = AsyncHyperBandScheduler(metric="neg_mean_loss", mode="max")
|
||||
tune.run(
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
analysis = tune.run(
|
||||
michalewicz_function,
|
||||
metric="neg_mean_loss",
|
||||
mode="max",
|
||||
name="my_exp",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**config)
|
||||
stop={"training_iteration": 100})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -112,7 +112,9 @@ if __name__ == "__main__":
|
||||
replicate_pem=False)
|
||||
analysis = tune.run(
|
||||
horovod_trainable,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config={"lr": tune.uniform(0.1, 1)},
|
||||
num_samples=2 if args.smoke_test else 10,
|
||||
fail_fast=True)
|
||||
config = analysis.get_best_config(metric="loss", mode="min")
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -51,16 +51,14 @@ if __name__ == "__main__":
|
||||
# Hyperband early stopping, configured with `episode_reward_mean` as the
|
||||
# objective and `training_iteration` as the time unit,
|
||||
# which is automatically filled by Tune.
|
||||
hyperband = HyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
max_t=200)
|
||||
hyperband = HyperBandScheduler(time_attr="training_iteration", max_t=200)
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
MyTrainableClass,
|
||||
name="hyperband_test",
|
||||
num_samples=20,
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
stop={"training_iteration": 1 if args.smoke_test else 99999},
|
||||
config={
|
||||
"width": tune.randint(10, 90),
|
||||
@@ -68,3 +66,5 @@ if __name__ == "__main__":
|
||||
},
|
||||
scheduler=hyperband,
|
||||
fail_fast=True)
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -21,6 +21,8 @@ def train(config, checkpoint_dir=None):
|
||||
v = np.tanh(float(timestep) / config.get("width", 1))
|
||||
v *= config.get("height", 1)
|
||||
|
||||
# Checkpoint the state of the training every 3 steps
|
||||
# Note that this is only required for certain schedulers
|
||||
if timestep % 3 == 0:
|
||||
with tune.checkpoint_dir(step=timestep) as checkpoint_dir:
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
@@ -42,17 +44,16 @@ if __name__ == "__main__":
|
||||
# Hyperband early stopping, configured with `episode_reward_mean` as the
|
||||
# objective and `training_iteration` as the time unit,
|
||||
# which is automatically filled by Tune.
|
||||
hyperband = HyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
max_t=200)
|
||||
hyperband = HyperBandScheduler(max_t=200)
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
train,
|
||||
name="hyperband_test",
|
||||
num_samples=20,
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
stop={"training_iteration": 10 if args.smoke_test else 99999},
|
||||
config={"height": tune.uniform(0, 100)},
|
||||
scheduler=hyperband,
|
||||
fail_fast=True)
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""This test checks that HyperOpt is functional.
|
||||
"""This example demonstrates the usage of HyperOpt with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
@@ -12,6 +12,7 @@ from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
|
||||
|
||||
def evaluation_fn(step, width, height):
|
||||
time.sleep(0.1)
|
||||
return (0.1 + width * step / 100)**(-1) + height * 0.1
|
||||
|
||||
|
||||
@@ -24,7 +25,6 @@ def easy_objective(config):
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -49,24 +49,23 @@ if __name__ == "__main__":
|
||||
}
|
||||
]
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 1000,
|
||||
"config": {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
# This is an ignored parameter.
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
}
|
||||
}
|
||||
algo = HyperOptSearch(points_to_evaluate=current_best_params)
|
||||
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
||||
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 1000,
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
# This is an ignored parameter.
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -5,6 +5,7 @@ import sklearn.metrics
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
|
||||
|
||||
def LightGBMCallback(env):
|
||||
@@ -41,11 +42,13 @@ if __name__ == "__main__":
|
||||
"num_leaves": tune.randint(10, 1000),
|
||||
"learning_rate": tune.loguniform(1e-8, 1e-1)
|
||||
}
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
tune.run(
|
||||
|
||||
analysis = tune.run(
|
||||
train_breast_cancer,
|
||||
metric="binary_error",
|
||||
mode="min",
|
||||
config=config,
|
||||
num_samples=2,
|
||||
scheduler=ASHAScheduler())
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,12 +1,9 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
from ray import tune
|
||||
from ray.tune import Trainable, run
|
||||
|
||||
|
||||
class TestLogger(tune.logger.Logger):
|
||||
@@ -18,34 +15,20 @@ def trial_str_creator(trial):
|
||||
return "{}_{}_123".format(trial.trainable_name, trial.trial_id)
|
||||
|
||||
|
||||
class MyTrainableClass(Trainable):
|
||||
"""Example agent whose learning curve is a random sigmoid.
|
||||
def evaluation_fn(step, width, height):
|
||||
time.sleep(0.1)
|
||||
return (0.1 + width * step / 100)**(-1) + height * 0.1
|
||||
|
||||
The dummy hyperparameters "width" and "height" determine the slope and
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def setup(self, config):
|
||||
self.timestep = 0
|
||||
def easy_objective(config):
|
||||
# Hyperparameters
|
||||
width, height = config["width"], config["height"]
|
||||
|
||||
def step(self):
|
||||
self.timestep += 1
|
||||
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
|
||||
v *= self.config.get("height", 1)
|
||||
|
||||
# Here we use `episode_reward_mean`, but you can also report other
|
||||
# objectives such as loss or accuracy.
|
||||
return {"episode_reward_mean": v}
|
||||
|
||||
def save_checkpoint(self, checkpoint_dir):
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
with open(path, "w") as f:
|
||||
f.write(json.dumps({"timestep": self.timestep}))
|
||||
return path
|
||||
|
||||
def load_checkpoint(self, checkpoint_path):
|
||||
with open(checkpoint_path) as f:
|
||||
self.timestep = json.loads(f.read())["timestep"]
|
||||
for step in range(config["steps"]):
|
||||
# Iterative training function - can be any arbitrary training procedure
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -54,14 +37,18 @@ if __name__ == "__main__":
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
trials = run(
|
||||
MyTrainableClass,
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="hyperband_test",
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
num_samples=5,
|
||||
trial_name_creator=trial_str_creator,
|
||||
loggers=[TestLogger],
|
||||
stop={"training_iteration": 1 if args.smoke_test else 99999},
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.randint(10, 100),
|
||||
"height": tune.loguniform(10, 100)
|
||||
})
|
||||
print("Best hyperparameters: ", analysis.best_config)
|
||||
|
||||
@@ -90,7 +90,7 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
data_dir=data_dir,
|
||||
num_epochs=num_epochs,
|
||||
num_gpus=gpus_per_trial)
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
trainable,
|
||||
resources_per_trial={
|
||||
"cpu": 1,
|
||||
@@ -102,6 +102,8 @@ def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
num_samples=num_samples,
|
||||
name="tune_mnist")
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
@@ -93,7 +93,6 @@ class LightningMNISTClassifier(pl.LightningModule):
|
||||
self.log("ptl/val_loss", avg_loss)
|
||||
self.log("ptl/val_accuracy", avg_acc)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def download_data(data_dir):
|
||||
transform = transforms.Compose([
|
||||
@@ -177,7 +176,8 @@ def train_mnist_tune_checkpoint(config,
|
||||
ckpt = pl_load(
|
||||
os.path.join(checkpoint_dir, "checkpoint"),
|
||||
map_location=lambda storage, loc: storage)
|
||||
model = LightningMNISTClassifier._load_model_state(ckpt, config=config, data_dir=data_dir)
|
||||
model = LightningMNISTClassifier._load_model_state(
|
||||
ckpt, config=config, data_dir=data_dir)
|
||||
trainer.current_epoch = ckpt["epoch"]
|
||||
else:
|
||||
model = LightningMNISTClassifier(config=config, data_dir=data_dir)
|
||||
@@ -199,8 +199,6 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
}
|
||||
|
||||
scheduler = ASHAScheduler(
|
||||
metric="loss",
|
||||
mode="min",
|
||||
max_t=num_epochs,
|
||||
grace_period=1,
|
||||
reduction_factor=2)
|
||||
@@ -209,7 +207,7 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
|
||||
metric_columns=["loss", "mean_accuracy", "training_iteration"])
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
tune.with_parameters(
|
||||
train_mnist_tune,
|
||||
data_dir=data_dir,
|
||||
@@ -219,12 +217,16 @@ def tune_mnist_asha(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
"cpu": 1,
|
||||
"gpu": gpus_per_trial
|
||||
},
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config=config,
|
||||
num_samples=num_samples,
|
||||
scheduler=scheduler,
|
||||
progress_reporter=reporter,
|
||||
name="tune_mnist_asha")
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
shutil.rmtree(data_dir)
|
||||
# __tune_asha_end__
|
||||
|
||||
@@ -242,9 +244,6 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
}
|
||||
|
||||
scheduler = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="loss",
|
||||
mode="min",
|
||||
perturbation_interval=4,
|
||||
hyperparam_mutations={
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
@@ -255,7 +254,7 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
|
||||
metric_columns=["loss", "mean_accuracy", "training_iteration"])
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
tune.with_parameters(
|
||||
train_mnist_tune_checkpoint,
|
||||
data_dir=data_dir,
|
||||
@@ -265,12 +264,16 @@ def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0):
|
||||
"cpu": 1,
|
||||
"gpu": gpus_per_trial
|
||||
},
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config=config,
|
||||
num_samples=num_samples,
|
||||
scheduler=scheduler,
|
||||
progress_reporter=reporter,
|
||||
name="tune_mnist_pbt")
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
shutil.rmtree(data_dir)
|
||||
# __tune_pbt_end__
|
||||
|
||||
|
||||
@@ -88,5 +88,4 @@ if __name__ == "__main__":
|
||||
"momentum": tune.uniform(0.1, 0.9),
|
||||
})
|
||||
|
||||
print("Best config is:",
|
||||
analysis.get_best_config(metric="mean_accuracy", mode="max"))
|
||||
print("Best config is:", analysis.best_config)
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from functools import partial
|
||||
|
||||
import mxnet as mx
|
||||
|
||||
from ray import tune, logger
|
||||
from ray.tune import CLIReporter
|
||||
from ray.tune.integration.mxnet import TuneCheckpointCallback, \
|
||||
TuneReportCallback
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
@@ -59,25 +57,21 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10):
|
||||
}
|
||||
|
||||
scheduler = ASHAScheduler(
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
max_t=num_epochs,
|
||||
grace_period=1,
|
||||
reduction_factor=2)
|
||||
max_t=num_epochs, grace_period=1, reduction_factor=2)
|
||||
|
||||
reporter = CLIReporter(
|
||||
parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"])
|
||||
|
||||
tune.run(
|
||||
partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
|
||||
analysis = tune.run(
|
||||
tune.with_parameters(
|
||||
train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
|
||||
resources_per_trial={
|
||||
"cpu": 1,
|
||||
},
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
config=config,
|
||||
num_samples=num_samples,
|
||||
scheduler=scheduler,
|
||||
progress_reporter=reporter,
|
||||
name="tune_mnist_mxnet")
|
||||
return analysis
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -89,6 +83,8 @@ if __name__ == "__main__":
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
if args.smoke_test:
|
||||
tune_mnist_mxnet(num_samples=1, num_epochs=1)
|
||||
analysis = tune_mnist_mxnet(num_samples=1, num_epochs=1)
|
||||
else:
|
||||
tune_mnist_mxnet(num_samples=10, num_epochs=10)
|
||||
analysis = tune_mnist_mxnet(num_samples=10, num_epochs=10)
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""This test checks that Nevergrad is functional.
|
||||
"""This example demonstrates the usage of Nevergrad with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
@@ -35,18 +34,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
# The config will be automatically converted to Nevergrad's search space
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 50,
|
||||
"config": {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
}
|
||||
}
|
||||
|
||||
# Optional: Pass the parameter space yourself
|
||||
# space = ng.p.Dict(
|
||||
@@ -63,11 +50,19 @@ if __name__ == "__main__":
|
||||
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
name="nevergrad",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 50,
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
"""This test checks that Optuna is functional.
|
||||
"""This example demonstrates the usage of Optuna with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
@@ -36,23 +36,22 @@ if __name__ == "__main__":
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init(configure_logging=False)
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 100,
|
||||
"config": {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
# This is an ignored parameter.
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
}
|
||||
}
|
||||
algo = OptunaSearch()
|
||||
algo = ConcurrencyLimiter(algo, max_concurrent=4)
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 100,
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
# This is an ignored parameter.
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -18,19 +18,18 @@ if __name__ == "__main__":
|
||||
ray.init()
|
||||
|
||||
pbt = PB2(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=20,
|
||||
hyperparam_bounds={
|
||||
# hyperparameter bounds.
|
||||
"lr": [0.0001, 0.02],
|
||||
})
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
pbt_function,
|
||||
name="pbt_test",
|
||||
scheduler=pbt,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
verbose=False,
|
||||
stop={
|
||||
"training_iteration": 30,
|
||||
@@ -43,3 +42,5 @@ if __name__ == "__main__":
|
||||
# the model training in this example
|
||||
"some_other_factor": 1,
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -4,7 +4,6 @@ import argparse
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
import ray
|
||||
from ray.tune import run, sample_from
|
||||
from ray.tune.schedulers import PopulationBasedTraining
|
||||
from ray.tune.schedulers.pb2 import PB2
|
||||
@@ -46,7 +45,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--save_csv", type=bool, default=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
ray.init()
|
||||
|
||||
# bipedalwalker needs 1600
|
||||
if args.env_name in ["BipedalWalker-v2", "BipedalWalker-v3"]:
|
||||
|
||||
@@ -86,8 +86,6 @@ if __name__ == "__main__":
|
||||
# __pbt_begin__
|
||||
scheduler = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=5,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -118,6 +116,8 @@ if __name__ == "__main__":
|
||||
name="pbt_test",
|
||||
scheduler=scheduler,
|
||||
reuse_actors=True,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
verbose=1,
|
||||
stop=stopper,
|
||||
export_formats=[ExportFormat.MODEL],
|
||||
@@ -131,9 +131,8 @@ if __name__ == "__main__":
|
||||
})
|
||||
# __tune_end__
|
||||
|
||||
best_trial = analysis.get_best_trial("mean_accuracy", "max")
|
||||
best_checkpoint = analysis.get_best_checkpoint(
|
||||
best_trial, metric="mean_accuracy", mode="max")
|
||||
best_trial = analysis.best_trial
|
||||
best_checkpoint = analysis.best_checkpoint
|
||||
restored_trainable = PytorchTrainable()
|
||||
restored_trainable.restore(best_checkpoint)
|
||||
best_model = restored_trainable.model
|
||||
|
||||
@@ -10,7 +10,6 @@ from torchvision import datasets
|
||||
from ray.tune.examples.mnist_pytorch import train, test, ConvNet,\
|
||||
get_data_loaders
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import PopulationBasedTraining
|
||||
from ray.tune.trial import ExportFormat
|
||||
@@ -66,14 +65,11 @@ if __name__ == "__main__":
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
ray.init()
|
||||
datasets.MNIST("~/data", train=True, download=True)
|
||||
|
||||
# __pbt_begin__
|
||||
scheduler = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=5,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -104,6 +100,8 @@ if __name__ == "__main__":
|
||||
train_convnet,
|
||||
name="pbt_test",
|
||||
scheduler=scheduler,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
verbose=1,
|
||||
stop=stopper,
|
||||
export_formats=[ExportFormat.MODEL],
|
||||
@@ -116,9 +114,8 @@ if __name__ == "__main__":
|
||||
})
|
||||
# __tune_end__
|
||||
|
||||
best_trial = analysis.get_best_trial("mean_accuracy", mode="max")
|
||||
best_checkpoint_path = analysis.get_best_checkpoint(
|
||||
best_trial, metric="mean_accuracy", mode="max")
|
||||
best_trial = analysis.best_trial
|
||||
best_checkpoint_path = analysis.best_checkpoint
|
||||
best_model = ConvNet()
|
||||
best_checkpoint = torch.load(
|
||||
os.path.join(best_checkpoint_path, "checkpoint"))
|
||||
|
||||
@@ -9,7 +9,6 @@ from ray.tune.schedulers import PopulationBasedTraining
|
||||
import argparse
|
||||
import os
|
||||
from filelock import FileLock
|
||||
import random
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.parallel
|
||||
@@ -105,9 +104,6 @@ if __name__ == "__main__":
|
||||
mnist_model_ref = ray.put(mnist_cnn)
|
||||
|
||||
scheduler = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="is_score",
|
||||
mode="max",
|
||||
perturbation_interval=5,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -124,12 +120,12 @@ if __name__ == "__main__":
|
||||
stop={
|
||||
"training_iteration": tune_iter,
|
||||
},
|
||||
metric="is_score",
|
||||
mode="max",
|
||||
num_samples=8,
|
||||
config={
|
||||
"netG_lr": tune.sample_from(
|
||||
lambda spec: random.choice([0.0001, 0.0002, 0.0005])),
|
||||
"netD_lr": tune.sample_from(
|
||||
lambda spec: random.choice([0.0001, 0.0002, 0.0005])),
|
||||
"netG_lr": tune.choice([0.0001, 0.0002, 0.0005]),
|
||||
"netD_lr": tune.choice([0.0001, 0.0002, 0.0005]),
|
||||
"mnist_model_ref": mnist_model_ref
|
||||
})
|
||||
# __tune_end__
|
||||
|
||||
@@ -127,8 +127,6 @@ if __name__ == "__main__":
|
||||
# __tune_begin__
|
||||
scheduler = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="is_score",
|
||||
mode="max",
|
||||
perturbation_interval=5,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -143,6 +141,8 @@ if __name__ == "__main__":
|
||||
scheduler=scheduler,
|
||||
reuse_actors=True,
|
||||
verbose=1,
|
||||
metric="is_score",
|
||||
mode="max",
|
||||
checkpoint_at_end=True,
|
||||
stop={
|
||||
"training_iteration": tune_iter,
|
||||
|
||||
@@ -5,11 +5,11 @@ import argparse
|
||||
import random
|
||||
|
||||
import ray
|
||||
from ray.tune import Trainable, run
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import PopulationBasedTraining
|
||||
|
||||
|
||||
class PBTBenchmarkExample(Trainable):
|
||||
class PBTBenchmarkExample(tune.Trainable):
|
||||
"""Toy PBT problem for benchmarking adaptive learning rate.
|
||||
|
||||
The goal is to optimize this trainable's accuracy. The accuracy increases
|
||||
@@ -93,8 +93,6 @@ if __name__ == "__main__":
|
||||
|
||||
pbt = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=20,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -103,10 +101,12 @@ if __name__ == "__main__":
|
||||
"some_other_factor": [1, 2],
|
||||
})
|
||||
|
||||
run(
|
||||
analysis = tune.run(
|
||||
PBTBenchmarkExample,
|
||||
name="pbt_test",
|
||||
scheduler=pbt,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
reuse_actors=True,
|
||||
checkpoint_freq=20,
|
||||
verbose=False,
|
||||
@@ -120,3 +120,5 @@ if __name__ == "__main__":
|
||||
# the model training in this example
|
||||
"some_other_factor": 1,
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -75,7 +75,8 @@ def pbt_function(config, checkpoint_dir=None):
|
||||
cur_lr=lr,
|
||||
optimal_lr=optimal_lr, # for debugging
|
||||
q_err=q_err, # for debugging
|
||||
done=accuracy > midpoint * 2)
|
||||
done=accuracy > midpoint * 2 # this stops the training process
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -90,8 +91,6 @@ if __name__ == "__main__":
|
||||
|
||||
pbt = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=4,
|
||||
hyperparam_mutations={
|
||||
# distribution for resampling
|
||||
@@ -100,11 +99,13 @@ if __name__ == "__main__":
|
||||
"some_other_factor": [1, 2],
|
||||
})
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
pbt_function,
|
||||
name="pbt_test",
|
||||
scheduler=pbt,
|
||||
verbose=False,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
stop={
|
||||
"training_iteration": 30,
|
||||
},
|
||||
@@ -116,3 +117,5 @@ if __name__ == "__main__":
|
||||
# the model training in this example
|
||||
"some_other_factor": 1,
|
||||
})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -268,9 +268,6 @@ if __name__ == "__main__":
|
||||
read_data()
|
||||
|
||||
pbt = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=2,
|
||||
hyperparam_mutations={
|
||||
"dropout": lambda: np.random.uniform(0, 1),
|
||||
@@ -282,6 +279,8 @@ if __name__ == "__main__":
|
||||
MemNNModel,
|
||||
name="pbt_babi_memnn",
|
||||
scheduler=pbt,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
stop={"training_iteration": 4 if args.smoke_test else 100},
|
||||
num_samples=2,
|
||||
config={
|
||||
|
||||
@@ -11,8 +11,7 @@ computationally demanding example.
|
||||
|
||||
import random
|
||||
|
||||
import ray
|
||||
from ray.tune import run, sample_from
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import PopulationBasedTraining
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -29,8 +28,6 @@ if __name__ == "__main__":
|
||||
|
||||
pbt = PopulationBasedTraining(
|
||||
time_attr="time_total_s",
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
perturbation_interval=120,
|
||||
resample_probability=0.25,
|
||||
# Specifies the mutations of these hyperparams
|
||||
@@ -44,12 +41,13 @@ if __name__ == "__main__":
|
||||
},
|
||||
custom_explore_fn=explore)
|
||||
|
||||
ray.init()
|
||||
run(
|
||||
analysis = tune.run(
|
||||
"PPO",
|
||||
name="pbt_humanoid_test",
|
||||
scheduler=pbt,
|
||||
num_samples=8,
|
||||
metric="episode_reward_mean",
|
||||
mode="max",
|
||||
config={
|
||||
"env": "Humanoid-v1",
|
||||
"kl_coeff": 1.0,
|
||||
@@ -63,10 +61,9 @@ if __name__ == "__main__":
|
||||
"clip_param": 0.2,
|
||||
"lr": 1e-4,
|
||||
# These params start off randomly drawn from a set.
|
||||
"num_sgd_iter": sample_from(
|
||||
lambda spec: random.choice([10, 20, 30])),
|
||||
"sgd_minibatch_size": sample_from(
|
||||
lambda spec: random.choice([128, 512, 2048])),
|
||||
"train_batch_size": sample_from(
|
||||
lambda spec: random.choice([10000, 20000, 40000]))
|
||||
"num_sgd_iter": tune.choice([10, 20, 30]),
|
||||
"sgd_minibatch_size": tune.choice([128, 512, 2048]),
|
||||
"train_batch_size": tune.choice([10000, 20000, 40000])
|
||||
})
|
||||
|
||||
print("best hyperparameters: ", analysis.best_config)
|
||||
|
||||
@@ -22,8 +22,7 @@ from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D
|
||||
from tensorflow.python.keras.models import Model, load_model
|
||||
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
|
||||
|
||||
import ray
|
||||
from ray.tune import grid_search, run, sample_from
|
||||
from ray import tune
|
||||
from ray.tune import Trainable
|
||||
from ray.tune.schedulers import PopulationBasedTraining
|
||||
|
||||
@@ -184,38 +183,39 @@ if __name__ == "__main__":
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
|
||||
train_spec = {
|
||||
"resources_per_trial": {
|
||||
"cpu": 1,
|
||||
"gpu": 1
|
||||
},
|
||||
"stop": {
|
||||
"mean_accuracy": 0.80,
|
||||
"training_iteration": 30,
|
||||
},
|
||||
"config": {
|
||||
"epochs": 1,
|
||||
"batch_size": 64,
|
||||
"lr": grid_search([10**-4, 10**-5]),
|
||||
"decay": sample_from(lambda spec: spec.config.lr / 100.0),
|
||||
"dropout": grid_search([0.25, 0.5]),
|
||||
},
|
||||
"num_samples": 4,
|
||||
space = {
|
||||
"epochs": 1,
|
||||
"batch_size": 64,
|
||||
"lr": tune.grid_search([10**-4, 10**-5]),
|
||||
"decay": tune.sample_from(lambda spec: spec.config.lr / 100.0),
|
||||
"dropout": tune.grid_search([0.25, 0.5]),
|
||||
}
|
||||
|
||||
if args.smoke_test:
|
||||
train_spec["config"]["lr"] = 10**-4
|
||||
train_spec["config"]["dropout"] = 0.5
|
||||
|
||||
ray.init()
|
||||
space["lr"] = 10**-4
|
||||
space["dropout"] = 0.5
|
||||
|
||||
pbt = PopulationBasedTraining(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
perturbation_interval=10,
|
||||
hyperparam_mutations={
|
||||
"dropout": lambda _: np.random.uniform(0, 1),
|
||||
})
|
||||
|
||||
run(Cifar10Model, name="pbt_cifar10", scheduler=pbt, **train_spec)
|
||||
analysis = tune.run(
|
||||
Cifar10Model,
|
||||
name="pbt_cifar10",
|
||||
scheduler=pbt,
|
||||
resources_per_trial={
|
||||
"cpu": 1,
|
||||
"gpu": 1
|
||||
},
|
||||
stop={
|
||||
"mean_accuracy": 0.80,
|
||||
"training_iteration": 30,
|
||||
},
|
||||
config=space,
|
||||
num_samples=4,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
)
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""This test checks that SigOpt is functional.
|
||||
"""This example demonstrates the usage of SigOpt with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
from ray.tune.suggest.sigopt import SigOptSearch
|
||||
@@ -37,7 +36,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
space = [
|
||||
{
|
||||
@@ -57,13 +55,6 @@ if __name__ == "__main__":
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
config = {
|
||||
"num_samples": 10 if args.smoke_test else 1000,
|
||||
"config": {
|
||||
"steps": 10
|
||||
}
|
||||
}
|
||||
algo = SigOptSearch(
|
||||
space,
|
||||
name="SigOpt Example Experiment",
|
||||
@@ -71,9 +62,12 @@ if __name__ == "__main__":
|
||||
metric="mean_loss",
|
||||
mode="min")
|
||||
scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="my_exp",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**config)
|
||||
num_samples=10 if args.smoke_test else 1000,
|
||||
config={"steps": 10})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,13 +1,9 @@
|
||||
"""This test checks that SigOpt is functional.
|
||||
"""Example using Sigopt's multi-objective functionality."""
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
import numpy as np
|
||||
from ray import tune
|
||||
from ray.tune.schedulers import FIFOScheduler
|
||||
from ray.tune.suggest.sigopt import SigOptSearch
|
||||
|
||||
np.random.seed(0)
|
||||
@@ -41,7 +37,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
space = [
|
||||
{
|
||||
@@ -54,13 +49,6 @@ if __name__ == "__main__":
|
||||
},
|
||||
]
|
||||
|
||||
config = {
|
||||
"num_samples": 10 if args.smoke_test else 1000,
|
||||
"config": {
|
||||
"total_weight": 1
|
||||
}
|
||||
}
|
||||
|
||||
algo = SigOptSearch(
|
||||
space,
|
||||
name="SigOpt Example Multi Objective Experiment",
|
||||
@@ -69,11 +57,10 @@ if __name__ == "__main__":
|
||||
metric=["average", "std", "sharpe"],
|
||||
mode=["max", "min", "obs"])
|
||||
|
||||
scheduler = FIFOScheduler()
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="my_exp",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**config)
|
||||
num_samples=10 if args.smoke_test else 1000,
|
||||
config={"total_weight": 1})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
"""This test checks that SigOpt is functional.
|
||||
""""Example using Sigopt's support for prior beliefs."""
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
import numpy as np
|
||||
from ray import tune
|
||||
|
||||
from ray.tune.schedulers import FIFOScheduler
|
||||
from ray.tune.suggest.sigopt import SigOptSearch
|
||||
|
||||
np.random.seed(0)
|
||||
@@ -36,7 +30,6 @@ def easy_objective(config):
|
||||
|
||||
average, std = evaluate(w1, w2, w3)
|
||||
tune.report(average=average, std=std)
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -51,8 +44,6 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
samples = 10 if args.smoke_test else 1000
|
||||
|
||||
conn = Connection(client_token=os.environ["SIGOPT_KEY"])
|
||||
@@ -90,8 +81,6 @@ if __name__ == "__main__":
|
||||
observation_budget=samples,
|
||||
parallel_bandwidth=1)
|
||||
|
||||
config = {"num_samples": samples, "config": {}}
|
||||
|
||||
algo = SigOptSearch(
|
||||
connection=conn,
|
||||
experiment_id=experiment.id,
|
||||
@@ -100,11 +89,10 @@ if __name__ == "__main__":
|
||||
metric=["average", "std"],
|
||||
mode=["obs", "min"])
|
||||
|
||||
scheduler = FIFOScheduler()
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
name="my_exp",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**config)
|
||||
num_samples=samples,
|
||||
config={})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""This test checks that Skopt is functional.
|
||||
"""This example demonstrates the usage of Skopt with Ray Tune.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
@@ -12,6 +11,7 @@ from ray.tune.suggest.skopt import SkOptSearch
|
||||
|
||||
|
||||
def evaluation_fn(step, width, height):
|
||||
time.sleep(0.1)
|
||||
return (0.1 + width * step / 100)**(-1) + height * 0.1
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ def easy_objective(config):
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -34,18 +33,8 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
# The config will be automatically converted to SkOpt's search space
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 50,
|
||||
"config": {
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
}
|
||||
}
|
||||
|
||||
# Optional: Pass the parameter space yourself
|
||||
# space = {
|
||||
@@ -66,11 +55,18 @@ if __name__ == "__main__":
|
||||
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
name="skopt_exp_with_warmstart",
|
||||
search_alg=algo,
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=10 if args.smoke_test else 50,
|
||||
config={
|
||||
"steps": 100,
|
||||
"width": tune.uniform(0, 20),
|
||||
"height": tune.uniform(-100, 100),
|
||||
"activation": tune.choice(["relu", "tanh"])
|
||||
})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -86,16 +86,15 @@ if __name__ == "__main__":
|
||||
use_gpu=args.use_gpu,
|
||||
num_workers=2,
|
||||
)
|
||||
sched = AsyncHyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
max_t=400,
|
||||
grace_period=20)
|
||||
tune.run(
|
||||
|
||||
sched = AsyncHyperBandScheduler(max_t=400, grace_period=20)
|
||||
|
||||
analysis = tune.run(
|
||||
tf_trainable,
|
||||
name="exp",
|
||||
scheduler=sched,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
stop={
|
||||
"mean_accuracy": 0.99,
|
||||
"training_iteration": 10
|
||||
@@ -108,3 +107,4 @@ if __name__ == "__main__":
|
||||
"hidden": tune.sample_from(
|
||||
lambda spec: np.random.randint(32, 512)),
|
||||
})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -116,8 +116,12 @@ class MNISTTrainable(tune.Trainable):
|
||||
|
||||
if __name__ == "__main__":
|
||||
load_data() # we download data on the driver to avoid race conditions.
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
MNISTTrainable,
|
||||
metric="test_loss",
|
||||
mode="min",
|
||||
stop={"training_iteration": 5 if args.smoke_test else 50},
|
||||
verbose=1,
|
||||
config={"hiddens": tune.grid_search([32, 64, 128])})
|
||||
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -13,6 +13,7 @@ from mxnet.gluon.data.vision import transforms
|
||||
from gluoncv.model_zoo import get_model
|
||||
from gluoncv.data import transforms as gcv_transforms
|
||||
|
||||
from ray.tune.schedulers import create_scheduler
|
||||
from ray import tune
|
||||
|
||||
# Training settings
|
||||
@@ -185,19 +186,9 @@ def train_cifar10(config):
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
sched = create_scheduler(args.scheduler)
|
||||
|
||||
import ray
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler
|
||||
|
||||
ray.init()
|
||||
if args.scheduler == "fifo":
|
||||
sched = FIFOScheduler()
|
||||
elif args.scheduler == "asynchyperband":
|
||||
sched = AsyncHyperBandScheduler(
|
||||
metric="mean_loss", mode="min", max_t=400, grace_period=60)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
train_cifar10,
|
||||
name=args.expname,
|
||||
verbose=2,
|
||||
@@ -213,8 +204,7 @@ if __name__ == "__main__":
|
||||
num_samples=1 if args.smoke_test else args.num_samples,
|
||||
config={
|
||||
"args": args,
|
||||
"lr": tune.sample_from(
|
||||
lambda spec: np.power(10.0, np.random.uniform(-4, -1))),
|
||||
"momentum": tune.sample_from(
|
||||
lambda spec: np.random.uniform(0.85, 0.95)),
|
||||
"lr": tune.loguniform(1e-4, 1e-1),
|
||||
"momentum": tune.uniform(0.85, 0.95),
|
||||
})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import argparse
|
||||
import numpy as np
|
||||
from tensorflow.keras.datasets import mnist
|
||||
|
||||
from ray.tune.integration.keras import TuneReportCallback
|
||||
@@ -52,16 +51,14 @@ if __name__ == "__main__":
|
||||
|
||||
ray.init(num_cpus=4 if args.smoke_test else None)
|
||||
sched = AsyncHyperBandScheduler(
|
||||
time_attr="training_iteration",
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
max_t=400,
|
||||
grace_period=20)
|
||||
time_attr="training_iteration", max_t=400, grace_period=20)
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
train_mnist,
|
||||
name="exp",
|
||||
scheduler=sched,
|
||||
metric="mean_accuracy",
|
||||
mode="max",
|
||||
stop={
|
||||
"mean_accuracy": 0.99,
|
||||
"training_iteration": 5 if args.smoke_test else 300
|
||||
@@ -73,9 +70,8 @@ if __name__ == "__main__":
|
||||
},
|
||||
config={
|
||||
"threads": 2,
|
||||
"lr": tune.sample_from(lambda spec: np.random.uniform(0.001, 0.1)),
|
||||
"momentum": tune.sample_from(
|
||||
lambda spec: np.random.uniform(0.1, 0.9)),
|
||||
"hidden": tune.sample_from(
|
||||
lambda spec: np.random.randint(32, 512)),
|
||||
"lr": tune.uniform(0.001, 0.1),
|
||||
"momentum": tune.uniform(0.1, 0.9),
|
||||
"hidden": tune.randint(32, 512),
|
||||
})
|
||||
print("Best hyperparameters found were: ", analysis.best_config)
|
||||
|
||||
@@ -20,8 +20,10 @@ def train_function(config, checkpoint_dir=None):
|
||||
|
||||
def tune_function(api_key_file):
|
||||
"""Example for using a WandbLogger with the function API"""
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
train_function,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config={
|
||||
"mean": tune.grid_search([1, 2, 3, 4, 5]),
|
||||
"sd": tune.uniform(0.2, 0.8),
|
||||
@@ -31,6 +33,7 @@ def tune_function(api_key_file):
|
||||
}
|
||||
},
|
||||
loggers=DEFAULT_LOGGERS + (WandbLogger, ))
|
||||
return analysis.best_config
|
||||
|
||||
|
||||
@wandb_mixin
|
||||
@@ -43,8 +46,10 @@ def decorated_train_function(config, checkpoint_dir=None):
|
||||
|
||||
def tune_decorated(api_key_file):
|
||||
"""Example for using the @wandb_mixin decorator with the function API"""
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
decorated_train_function,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config={
|
||||
"mean": tune.grid_search([1, 2, 3, 4, 5]),
|
||||
"sd": tune.uniform(0.2, 0.8),
|
||||
@@ -53,6 +58,7 @@ def tune_decorated(api_key_file):
|
||||
"project": "Wandb_example"
|
||||
}
|
||||
})
|
||||
return analysis.best_config
|
||||
|
||||
|
||||
class WandbTrainable(WandbTrainableMixin, Trainable):
|
||||
@@ -65,8 +71,10 @@ class WandbTrainable(WandbTrainableMixin, Trainable):
|
||||
|
||||
def tune_trainable(api_key_file):
|
||||
"""Example for using a WandTrainableMixin with the class API"""
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
WandbTrainable,
|
||||
metric="loss",
|
||||
mode="min",
|
||||
config={
|
||||
"mean": tune.grid_search([1, 2, 3, 4, 5]),
|
||||
"sd": tune.uniform(0.2, 0.8),
|
||||
@@ -75,6 +83,7 @@ def tune_trainable(api_key_file):
|
||||
"project": "Wandb_example"
|
||||
}
|
||||
})
|
||||
return analysis.best_config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import sklearn.datasets
|
||||
import sklearn.metrics
|
||||
import os
|
||||
from ray.tune.schedulers import ASHAScheduler
|
||||
from sklearn.model_selection import train_test_split
|
||||
import xgboost as xgb
|
||||
@@ -8,7 +9,8 @@ from ray import tune
|
||||
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
|
||||
|
||||
|
||||
def train_breast_cancer(config):
|
||||
def train_breast_cancer(config: dict):
|
||||
# This is a simple training function to be passed into Tune
|
||||
# Load dataset
|
||||
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
||||
# Split into train and test set
|
||||
@@ -17,7 +19,7 @@ def train_breast_cancer(config):
|
||||
# Build input matrices for XGBoost
|
||||
train_set = xgb.DMatrix(train_x, label=train_y)
|
||||
test_set = xgb.DMatrix(test_x, label=test_y)
|
||||
# Train the classifier
|
||||
# Train the classifier, using the Tune callback
|
||||
xgb.train(
|
||||
config,
|
||||
train_set,
|
||||
@@ -27,7 +29,8 @@ def train_breast_cancer(config):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
config = {
|
||||
search_space = {
|
||||
# You can mix constants with search space objects.
|
||||
"objective": "binary:logistic",
|
||||
"eval_metric": ["logloss", "error"],
|
||||
"max_depth": tune.randint(1, 9),
|
||||
@@ -35,6 +38,7 @@ if __name__ == "__main__":
|
||||
"subsample": tune.uniform(0.5, 1.0),
|
||||
"eta": tune.loguniform(1e-4, 1e-1)
|
||||
}
|
||||
# This will enable aggressive early stopping of bad trials.
|
||||
scheduler = ASHAScheduler(
|
||||
max_t=10, # 10 training iterations
|
||||
grace_period=1,
|
||||
@@ -44,13 +48,13 @@ if __name__ == "__main__":
|
||||
train_breast_cancer,
|
||||
metric="eval-logloss",
|
||||
mode="min",
|
||||
resources_per_trial={"cpu": 1}, # You can add "gpu": 0.1 here
|
||||
config=config,
|
||||
# You can add "gpu": 0.1 to allocate GPUs
|
||||
resources_per_trial={"cpu": 1},
|
||||
config=search_space,
|
||||
num_samples=10,
|
||||
scheduler=scheduler)
|
||||
|
||||
# Load the best model checkpoint
|
||||
import os
|
||||
best_bst = xgb.Booster()
|
||||
best_bst.load_model(os.path.join(analysis.best_checkpoint, "model.xgb"))
|
||||
accuracy = 1. - analysis.best_result["eval-error"]
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""This test checks that ZOOptSearch is functional.
|
||||
"""This example demonstrates the usage of ZOOptSearch.
|
||||
|
||||
It also checks that it is usable with a separate scheduler.
|
||||
"""
|
||||
import time
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.suggest.zoopt import ZOOptSearch
|
||||
from ray.tune.schedulers import AsyncHyperBandScheduler
|
||||
@@ -12,6 +11,7 @@ from zoopt import ValueType # noqa: F401
|
||||
|
||||
|
||||
def evaluation_fn(step, width, height):
|
||||
time.sleep(0.1)
|
||||
return (0.1 + width * step / 100)**(-1) + height * 0.1
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ def easy_objective(config):
|
||||
intermediate_score = evaluation_fn(step, width, height)
|
||||
# Feed the score back back to Tune.
|
||||
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||
time.sleep(0.1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -34,16 +33,8 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"--smoke-test", action="store_true", help="Finish quickly for testing")
|
||||
args, _ = parser.parse_known_args()
|
||||
ray.init()
|
||||
|
||||
tune_kwargs = {
|
||||
"num_samples": 10 if args.smoke_test else 1000,
|
||||
"config": {
|
||||
"steps": 10,
|
||||
"height": tune.quniform(-10, 10, 1e-2),
|
||||
"width": tune.randint(0, 10)
|
||||
}
|
||||
}
|
||||
num_samples = 10 if args.smoke_test else 1000
|
||||
|
||||
# Optional: Pass the parameter space yourself
|
||||
# space = {
|
||||
@@ -61,17 +52,23 @@ if __name__ == "__main__":
|
||||
|
||||
zoopt_search = ZOOptSearch(
|
||||
algo="Asracos", # only support ASRacos currently
|
||||
budget=tune_kwargs["num_samples"],
|
||||
budget=num_samples,
|
||||
# dim_dict=space, # If you want to set the space yourself
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
**zoopt_search_config)
|
||||
|
||||
scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
|
||||
scheduler = AsyncHyperBandScheduler()
|
||||
|
||||
tune.run(
|
||||
analysis = tune.run(
|
||||
easy_objective,
|
||||
metric="mean_loss",
|
||||
mode="min",
|
||||
search_alg=zoopt_search,
|
||||
name="zoopt_search",
|
||||
scheduler=scheduler,
|
||||
**tune_kwargs)
|
||||
num_samples=num_samples,
|
||||
config={
|
||||
"steps": 10,
|
||||
"height": tune.quniform(-10, 10, 1e-2),
|
||||
"width": tune.randint(0, 10)
|
||||
})
|
||||
print("Best config found: ", analysis.best_config)
|
||||
|
||||
@@ -2,7 +2,7 @@ import os
|
||||
import argparse
|
||||
|
||||
from ray.tune import run
|
||||
from ray.tune.examples.async_hyperband_example import MyTrainableClass
|
||||
from ray.tune.utils._mock_trainable import MyTrainableClass
|
||||
from ray.tune.suggest.hyperopt import HyperOptSearch
|
||||
from ray.tune.suggest.suggestion import ConcurrencyLimiter
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ from ray.tune.syncer import CloudSyncer, SyncerCallback, get_node_syncer
|
||||
from ray.tune.utils.trainable import TrainableUtil
|
||||
from ray.tune.trial import Trial
|
||||
from ray.tune.trial_runner import TrialRunner
|
||||
from ray.tune.utils._mock_trainable import MyTrainableClass
|
||||
from ray.tune.utils.mock import (MockDurableTrainer, MockRemoteTrainer,
|
||||
MockNodeSyncer, mock_storage_client,
|
||||
MOCK_REMOTE_DIR)
|
||||
@@ -746,7 +747,6 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
|
||||
cluster = start_connected_cluster
|
||||
dirpath = str(tmpdir)
|
||||
local_checkpoint_dir = os.path.join(dirpath, "experiment")
|
||||
from ray.tune.examples.async_hyperband_example import MyTrainableClass
|
||||
from ray.tune import register_trainable
|
||||
register_trainable("trainable", MyTrainableClass)
|
||||
|
||||
@@ -770,6 +770,8 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
|
||||
if trials and len(trials) >= 10:
|
||||
break
|
||||
time.sleep(.5)
|
||||
else:
|
||||
raise ValueError(f"Didn't generate enough trials: {len(trials)}")
|
||||
|
||||
if not TrialRunner.checkpoint_exists(local_checkpoint_dir):
|
||||
raise RuntimeError(
|
||||
@@ -792,8 +794,10 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir):
|
||||
runner = TrialRunner(
|
||||
resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir)
|
||||
trials = runner.get_trials()
|
||||
|
||||
if len(trials) == 0:
|
||||
continue # nonblocking script hasn't resumed yet, wait
|
||||
|
||||
reached = True
|
||||
assert len(trials) >= 10
|
||||
assert len(trials) <= 20
|
||||
|
||||
@@ -8,7 +8,7 @@ from numpy import nan
|
||||
|
||||
import ray
|
||||
from ray import tune
|
||||
from ray.tune.examples.async_hyperband_example import MyTrainableClass
|
||||
from ray.tune.utils.mock import MyTrainableClass
|
||||
|
||||
|
||||
class ExperimentAnalysisSuite(unittest.TestCase):
|
||||
|
||||
@@ -11,7 +11,7 @@ import numpy as np
|
||||
import ray
|
||||
from ray.tune import (run, Trainable, sample_from, Analysis,
|
||||
ExperimentAnalysis, grid_search)
|
||||
from ray.tune.examples.async_hyperband_example import MyTrainableClass
|
||||
from ray.tune.utils.mock import MyTrainableClass
|
||||
|
||||
|
||||
class ExperimentAnalysisInMemorySuite(unittest.TestCase):
|
||||
|
||||
@@ -116,7 +116,7 @@ class TuneExampleTest(unittest.TestCase):
|
||||
validate_save_restore(MyTrainableClass, use_object_store=True)
|
||||
|
||||
def testAsyncHyperbandExample(self):
|
||||
from ray.tune.examples.async_hyperband_example import MyTrainableClass
|
||||
from ray.tune.utils.mock import MyTrainableClass
|
||||
validate_save_restore(MyTrainableClass)
|
||||
validate_save_restore(MyTrainableClass, use_object_store=True)
|
||||
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
import os
|
||||
import json
|
||||
import numpy as np
|
||||
from ray.tune import Trainable
|
||||
|
||||
|
||||
class MyTrainableClass(Trainable):
|
||||
"""Example agent whose learning curve is a random sigmoid.
|
||||
|
||||
The dummy hyperparameters "width" and "height" determine the slope and
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def setup(self, config):
|
||||
self.timestep = 0
|
||||
|
||||
def step(self):
|
||||
self.timestep += 1
|
||||
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
|
||||
v *= self.config.get("height", 1)
|
||||
|
||||
# Here we use `episode_reward_mean`, but you can also report other
|
||||
# objectives such as loss or accuracy.
|
||||
return {"episode_reward_mean": v}
|
||||
|
||||
def save_checkpoint(self, checkpoint_dir):
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
with open(path, "w") as f:
|
||||
f.write(json.dumps({"timestep": self.timestep}))
|
||||
return path
|
||||
|
||||
def load_checkpoint(self, checkpoint_path):
|
||||
with open(checkpoint_path) as f:
|
||||
self.timestep = json.loads(f.read())["timestep"]
|
||||
@@ -1,9 +1,11 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
import ray.utils
|
||||
|
||||
from ray.rllib.agents.mock import _MockTrainer
|
||||
from ray.tune import DurableTrainable
|
||||
from ray.tune import DurableTrainable, Trainable
|
||||
from ray.tune.sync_client import get_sync_client
|
||||
from ray.tune.syncer import NodeSyncer
|
||||
|
||||
@@ -56,3 +58,33 @@ class MockDurableTrainer(DurableTrainable, _MockTrainer):
|
||||
|
||||
def _create_storage_client(self):
|
||||
return mock_storage_client()
|
||||
|
||||
|
||||
class MyTrainableClass(Trainable):
|
||||
"""Example agent whose learning curve is a random sigmoid.
|
||||
|
||||
The dummy hyperparameters "width" and "height" determine the slope and
|
||||
maximum reward value reached.
|
||||
"""
|
||||
|
||||
def setup(self, config):
|
||||
self.timestep = 0
|
||||
|
||||
def step(self):
|
||||
self.timestep += 1
|
||||
v = np.tanh(float(self.timestep) / self.config.get("width", 1))
|
||||
v *= self.config.get("height", 1)
|
||||
|
||||
# Here we use `episode_reward_mean`, but you can also report other
|
||||
# objectives such as loss or accuracy.
|
||||
return {"episode_reward_mean": v}
|
||||
|
||||
def save_checkpoint(self, checkpoint_dir):
|
||||
path = os.path.join(checkpoint_dir, "checkpoint")
|
||||
with open(path, "w") as f:
|
||||
f.write(json.dumps({"timestep": self.timestep}))
|
||||
return path
|
||||
|
||||
def load_checkpoint(self, checkpoint_path):
|
||||
with open(checkpoint_path) as f:
|
||||
self.timestep = json.loads(f.read())["timestep"]
|
||||
|
||||
Reference in New Issue
Block a user