[tune] Fix up examples (#9201)

2026-07-02 01:55:25 +08:00 · 2020-07-05 01:16:20 -07:00
parent f7ac495a68
commit b71c912da7
22 changed files with 380 additions and 243 deletions
@@ -3,9 +3,10 @@
 It also checks that it is usable with a separate scheduler.
 """
 import numpy as np
+import time

 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.ax import AxSearch

@@ -33,12 +34,10 @@ def hartmann6(x):
    return y


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
+def easy_objective(config):
    for i in range(config["iterations"]):
        x = np.array([config.get("x{}".format(i + 1)) for i in range(6)])
-        reporter(
+        tune.report(
            timesteps_total=i,
            hartmann6=hartmann6(x),
            l2norm=np.sqrt((x**2).sum()))
@@ -109,7 +108,8 @@ if __name__ == "__main__":
    )
    algo = AxSearch(client, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="ax",
        search_alg=algo,
        scheduler=scheduler,
@@ -2,20 +2,28 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.bayesopt import BayesOptSearch


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -32,10 +40,7 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
-            "iterations": 100,
-        },
-        "stop": {
-            "timesteps_total": 100
+            "steps": 100,
        }
    }
    algo = BayesOptSearch(
@@ -48,7 +53,8 @@ if __name__ == "__main__":
            "xi": 0.0
        })
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="my_exp",
        search_alg=algo,
        scheduler=scheduler,
@@ -6,16 +6,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import numpy as np
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.dragonfly import DragonflySearch


-def objective(config, reporter):
-    import numpy as np
-    import time
-    time.sleep(0.2)
+def objective(config):
    for i in range(config["iterations"]):
        vol1 = config["point"][0]  # LiNO3
        vol2 = config["point"][1]  # Li2SO4
@@ -25,7 +25,7 @@ def objective(config, reporter):
        conductivity = vol1 + 0.1 * (vol2 + vol3)**2 + 2.3 * vol4 * (vol1**1.5)
        # Add Gaussian noise to simulate experimental noise
        conductivity += np.random.normal() * 0.01
-        reporter(timesteps_total=i, objective=conductivity)
+        tune.report(timesteps_total=i, objective=conductivity)
        time.sleep(0.02)


@@ -46,9 +46,6 @@ if __name__ == "__main__":
        "config": {
            "iterations": 100,
        },
-        "stop": {
-            "timesteps_total": 100
-        },
    }

    domain_vars = [{
@@ -75,7 +72,8 @@ if __name__ == "__main__":
    optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True)
    algo = DragonflySearch(optimizer, metric="objective", mode="max")
    scheduler = AsyncHyperBandScheduler(metric="objective", mode="max")
-    run(objective,
+    tune.run(
+        objective,
        name="dragonfly_search",
        search_alg=algo,
        scheduler=scheduler,
@@ -3,7 +3,7 @@
 It also checks that it is usable with a separate scheduler.
 """
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.automl import GeneticSearch
 from ray.tune.automl import ContinuousSpace, DiscreteSpace, SearchSpace
@@ -20,7 +20,7 @@ def michalewicz_function(config, reporter):
    y = np.dot(sin_x, sin_z)

    # Negate y since we want to minimize y value
-    reporter(timesteps_total=1, neg_mean_loss=-y)
+    tune.report(timesteps_total=1, neg_mean_loss=-y)


 if __name__ == "__main__":
@@ -47,7 +47,8 @@ if __name__ == "__main__":
        max_generation=2 if args.smoke_test else 10,
        population_size=10 if args.smoke_test else 50)
    scheduler = AsyncHyperBandScheduler(metric="neg_mean_loss", mode="max")
-    run(michalewicz_function,
+    tune.run(
+        michalewicz_function,
        name="my_exp",
        search_alg=algo,
        scheduler=scheduler,
@@ -2,22 +2,28 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.hyperopt import HyperOptSearch


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    assert type(config["activation"]) == str, \
-        "Config is incorrect: {}".format(type(config["activation"]))
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -33,6 +39,7 @@ if __name__ == "__main__":
    space = {
        "width": hp.uniform("width", 0, 20),
        "height": hp.uniform("height", -100, 100),
+        # This is an ignored parameter.
        "activation": hp.choice("activation", ["relu", "tanh"])
    }

@@ -52,11 +59,8 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
-            "iterations": 100,
-        },
-        "stop": {
-            "timesteps_total": 100
-        },
+            "steps": 100,
+        }
    }
    algo = HyperOptSearch(
        space,
@@ -64,4 +68,4 @@ if __name__ == "__main__":
        mode="min",
        points_to_evaluate=current_best_params)
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
+    tune.run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
@@ -15,13 +15,20 @@ from ray import tune
 from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS


+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
 def easy_objective(config):
-    for i in range(20):
-        result = dict(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        tune.report(**result)
-        time.sleep(0.02)
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config.get("steps", 100)):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -2,20 +2,28 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.nevergrad import NevergradSearch


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -31,10 +39,7 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 50,
        "config": {
-            "iterations": 100,
-        },
-        "stop": {
-            "timesteps_total": 100
+            "steps": 100,
        }
    }
    instrumentation = 2
@@ -49,7 +54,8 @@ if __name__ == "__main__":
    algo = NevergradSearch(
        optimizer, parameter_names, metric="mean_loss", mode="min")
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="nevergrad",
        search_alg=algo,
        scheduler=scheduler,
@@ -2,20 +2,28 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.sigopt import SigOptSearch


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluate(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.01
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluate(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -53,11 +61,8 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
-            "iterations": 100,
-        },
-        "stop": {
-            "timesteps_total": 100
-        },
+            "steps": 10
+        }
    }
    algo = SigOptSearch(
        space,
@@ -66,7 +71,8 @@ if __name__ == "__main__":
        metric="mean_loss",
        mode="min")
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="my_exp",
        search_alg=algo,
        scheduler=scheduler,
@@ -2,20 +2,28 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.skopt import SkOptSearch


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -31,11 +39,8 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 50,
        "config": {
-            "iterations": 100,
-        },
-        "stop": {
-            "timesteps_total": 100
-        },
+            "steps": 100,
+        }
    }
    optimizer = Optimizer([(0, 20), (-100, 100)])
    previously_run_params = [[10, 0], [15, -20]]
@@ -47,7 +52,8 @@ if __name__ == "__main__":
        points_to_evaluate=previously_run_params,
        evaluated_rewards=known_rewards)
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="skopt_exp_with_warmstart",
        search_alg=algo,
        scheduler=scheduler,
@@ -61,7 +67,8 @@ if __name__ == "__main__":
        mode="min",
        points_to_evaluate=previously_run_params)
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
-    run(easy_objective,
+    tune.run(
+        easy_objective,
        name="skopt_exp",
        search_alg=algo,
        scheduler=scheduler,
@@ -13,6 +13,8 @@ from mxnet.gluon.data.vision import transforms
 from gluoncv.model_zoo import get_model
 from gluoncv.data import transforms as gcv_transforms

+from ray import tune
+
 # Training settings
 parser = argparse.ArgumentParser(description="CIFAR-10 Example")
 parser.add_argument(
@@ -86,7 +88,8 @@ parser.add_argument(
 args = parser.parse_args()


-def train_cifar10(args, config, reporter):
+def train_cifar10(config):
+    args = config.pop("args")
    vars(args).update(config)
    np.random.seed(args.seed)
    random.seed(args.seed)
@@ -172,18 +175,18 @@ def train_cifar10(args, config, reporter):

        _, test_acc = metric.get()
        test_loss /= len(test_data)
-        reporter(mean_loss=test_loss, mean_accuracy=test_acc)
+        return test_loss, test_acc

    for epoch in range(1, args.epochs + 1):
        train(epoch)
-        test()
+        test_loss, test_acc = test()
+        tune.report(mean_loss=test_loss, mean_accuracy=test_acc)


 if __name__ == "__main__":
    args = parser.parse_args()

    import ray
-    from ray import tune
    from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler

    ray.init()
@@ -198,11 +201,8 @@ if __name__ == "__main__":
            grace_period=60)
    else:
        raise NotImplementedError
-    tune.register_trainable(
-        "TRAIN_FN",
-        lambda config, reporter: train_cifar10(args, config, reporter))
    tune.run(
-        "TRAIN_FN",
+        train_cifar10,
        name=args.expname,
        verbose=2,
        scheduler=sched,
@@ -216,6 +216,7 @@ if __name__ == "__main__":
        },
        num_samples=1 if args.smoke_test else args.num_samples,
        config={
+            "args": args,
            "lr": tune.sample_from(
                lambda spec: np.power(10.0, np.random.uniform(-4, -1))),
            "momentum": tune.sample_from(
@@ -2,21 +2,29 @@

 It also checks that it is usable with a separate scheduler.
 """
+import time
+
 import ray
-from ray.tune import run
+from ray import tune
 from ray.tune.suggest.zoopt import ZOOptSearch
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from zoopt import ValueType


-def easy_objective(config, reporter):
-    import time
-    time.sleep(0.2)
-    for i in range(config["iterations"]):
-        reporter(
-            timesteps_total=i,
-            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
-        time.sleep(0.02)
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)


 if __name__ == "__main__":
@@ -40,10 +48,7 @@ if __name__ == "__main__":
    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
-            "iterations": 10,  # evaluation times
-        },
-        "stop": {
-            "timesteps_total": 10  # cumstom stop rules
+            "steps": 10,  # evaluation times
        }
    }

@@ -56,7 +61,8 @@ if __name__ == "__main__":

    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")

-    run(easy_objective,
+    tune.run(
+        easy_objective,
        search_alg=zoopt_search,
        name="zoopt_search",
        scheduler=scheduler,
@@ -11,25 +11,31 @@
 # ray.init(address=args.address)

 # __quick_start_begin__
-import torch.optim as optim
 from ray import tune
-from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test


-def train_mnist(config):
-    train_loader, test_loader = get_data_loaders()
-    model = ConvNet()
-    optimizer = optim.SGD(model.parameters(), lr=config["lr"])
-    for i in range(10):
-        train(model, optimizer, train_loader)
-        acc = test(model, test_loader)
-        tune.report(mean_accuracy=acc)
+def objective(step, alpha, beta):
+    return (0.1 + alpha * step / 100)**(-1) + beta * 0.1
+
+
+def training_function(config):
+    # Hyperparameters
+    alpha, beta = config["alpha"], config["beta"]
+    for step in range(10):
+        # Iterative training function - can be any arbitrary training procedure.
+        intermediate_score = objective(step, alpha, beta)
+        # Feed the score back back to Tune.
+        tune.report(mean_loss=intermediate_score)


 analysis = tune.run(
-    train_mnist, config={"lr": tune.grid_search([0.001, 0.01, 0.1])})
+    training_function,
+    config={
+        "alpha": tune.grid_search([0.001, 0.01, 0.1]),
+        "beta": tune.choice([1, 2, 3])
+    })

-print("Best config: ", analysis.get_best_config(metric="mean_accuracy"))
+print("Best config: ", analysis.get_best_config(metric="mean_loss"))

 # Get a dataframe for analyzing trial results.
 df = analysis.dataframe()
@@ -6,29 +6,106 @@
 import numpy as np
 import torch
 import torch.optim as optim
-from torchvision import datasets
+import torch.nn as nn
+from torchvision import datasets, transforms
+from torch.utils.data import DataLoader
+import torch.nn.functional as F

 from ray import tune
 from ray.tune.schedulers import ASHAScheduler
-from ray.tune.examples.mnist_pytorch import get_data_loaders, ConvNet, train, test
 # __tutorial_imports_end__
 # yapf: enable


 # yapf: disable
+# __model_def_begin__
+class ConvNet(nn.Module):
+    def __init__(self):
+        super(ConvNet, self).__init__()
+        # In this example, we don't change the model architecture
+        # due to simplicity.
+        self.conv1 = nn.Conv2d(1, 3, kernel_size=3)
+        self.fc = nn.Linear(192, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 3))
+        x = x.view(-1, 192)
+        x = self.fc(x)
+        return F.log_softmax(x, dim=1)
+# __model_def_end__
+# yapf: enable
+
+# yapf: disable
+# __train_def_begin__
+
+# Change these values if you want the training to run quicker or slower.
+EPOCH_SIZE = 512
+TEST_SIZE = 256
+
+def train(model, optimizer, train_loader):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        # We set this just for the example to run quickly.
+        if batch_idx * len(data) > EPOCH_SIZE:
+            return
+        data, target = data.to(device), target.to(device)
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+
+
+def test(model, data_loader):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.eval()
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for batch_idx, (data, target) in enumerate(data_loader):
+            # We set this just for the example to run quickly.
+            if batch_idx * len(data) > TEST_SIZE:
+                break
+            data, target = data.to(device), target.to(device)
+            outputs = model(data)
+            _, predicted = torch.max(outputs.data, 1)
+            total += target.size(0)
+            correct += (predicted == target).sum().item()
+
+    return correct / total
+# __train_def_end__
+
+
 # __train_func_begin__
 def train_mnist(config):
+    # Data Setup
+    mnist_transforms = transforms.Compose(
+        [transforms.ToTensor(),
+         transforms.Normalize((0.1307, ), (0.3081, ))])
+
+    train_loader = DataLoader(
+        datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),
+        batch_size=64,
+        shuffle=True)
+    test_loader = DataLoader(
+        datasets.MNIST("~/data", train=False, transform=mnist_transforms),
+        batch_size=64,
+        shuffle=True)
+
    model = ConvNet()
-    train_loader, test_loader = get_data_loaders()
    optimizer = optim.SGD(
        model.parameters(), lr=config["lr"], momentum=config["momentum"])
    for i in range(10):
        train(model, optimizer, train_loader)
        acc = test(model, test_loader)
+
+        # Send the current training result back to Tune
        tune.report(mean_accuracy=acc)
+
        if i % 5 == 0:
            # This saves the model to the trial directory
-            torch.save(model, "./model.pth")
+            torch.save(model.state_dict(), "./model.pth")
 # __train_func_end__
 # yapf: enable

@@ -39,7 +116,10 @@ search_space = {
 }

 # Uncomment this to enable distributed execution
-# `ray.init(address=...)`
+# `ray.init(address="auto")`
+
+# Download the dataset first
+datasets.MNIST("~/data", train=True, download=True)

 analysis = tune.run(train_mnist, config=search_space)
 # __eval_func_end__
@@ -52,7 +132,7 @@ dfs = analysis.trial_dataframes
 # __run_scheduler_begin__
 analysis = tune.run(
    train_mnist,
-    num_samples=30,
+    num_samples=20,
    scheduler=ASHAScheduler(metric="mean_accuracy", mode="max"),
    config=search_space)

@@ -88,7 +168,10 @@ import os

 df = analysis.dataframe()
 logdir = analysis.get_best_logdir("mean_accuracy", mode="max")
-model = torch.load(os.path.join(logdir, "model.pth"))
+state_dict = torch.load(os.path.join(logdir, "model.pth"))
+
+model = ConvNet()
+model.load_state_dict(state_dict)
 # __run_analysis_end__

 from ray.tune.examples.mnist_pytorch_trainable import TrainMNIST