mirror of
https://github.com/wassname/ray.git
synced 2026-06-28 03:02:56 +08:00
197 lines
5.4 KiB
Python
197 lines
5.4 KiB
Python
# flake8: noqa
|
|
# Original Code: https://github.com/pytorch/examples/blob/master/mnist/main.py
|
|
|
|
# yapf: disable
|
|
# __tutorial_imports_begin__
|
|
import numpy as np
|
|
import torch
|
|
import torch.optim as optim
|
|
import torch.nn as nn
|
|
from torchvision import datasets, transforms
|
|
from torch.utils.data import DataLoader
|
|
import torch.nn.functional as F
|
|
|
|
from ray import tune
|
|
from ray.tune.schedulers import ASHAScheduler
|
|
# __tutorial_imports_end__
|
|
# yapf: enable
|
|
|
|
|
|
# yapf: disable
|
|
# __model_def_begin__
|
|
class ConvNet(nn.Module):
|
|
def __init__(self):
|
|
super(ConvNet, self).__init__()
|
|
# In this example, we don't change the model architecture
|
|
# due to simplicity.
|
|
self.conv1 = nn.Conv2d(1, 3, kernel_size=3)
|
|
self.fc = nn.Linear(192, 10)
|
|
|
|
def forward(self, x):
|
|
x = F.relu(F.max_pool2d(self.conv1(x), 3))
|
|
x = x.view(-1, 192)
|
|
x = self.fc(x)
|
|
return F.log_softmax(x, dim=1)
|
|
# __model_def_end__
|
|
# yapf: enable
|
|
|
|
# yapf: disable
|
|
# __train_def_begin__
|
|
|
|
# Change these values if you want the training to run quicker or slower.
|
|
EPOCH_SIZE = 512
|
|
TEST_SIZE = 256
|
|
|
|
def train(model, optimizer, train_loader):
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
model.train()
|
|
for batch_idx, (data, target) in enumerate(train_loader):
|
|
# We set this just for the example to run quickly.
|
|
if batch_idx * len(data) > EPOCH_SIZE:
|
|
return
|
|
data, target = data.to(device), target.to(device)
|
|
optimizer.zero_grad()
|
|
output = model(data)
|
|
loss = F.nll_loss(output, target)
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
|
|
def test(model, data_loader):
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
model.eval()
|
|
correct = 0
|
|
total = 0
|
|
with torch.no_grad():
|
|
for batch_idx, (data, target) in enumerate(data_loader):
|
|
# We set this just for the example to run quickly.
|
|
if batch_idx * len(data) > TEST_SIZE:
|
|
break
|
|
data, target = data.to(device), target.to(device)
|
|
outputs = model(data)
|
|
_, predicted = torch.max(outputs.data, 1)
|
|
total += target.size(0)
|
|
correct += (predicted == target).sum().item()
|
|
|
|
return correct / total
|
|
# __train_def_end__
|
|
|
|
|
|
# __train_func_begin__
|
|
def train_mnist(config):
|
|
# Data Setup
|
|
mnist_transforms = transforms.Compose(
|
|
[transforms.ToTensor(),
|
|
transforms.Normalize((0.1307, ), (0.3081, ))])
|
|
|
|
train_loader = DataLoader(
|
|
datasets.MNIST("~/data", train=True, download=True, transform=mnist_transforms),
|
|
batch_size=64,
|
|
shuffle=True)
|
|
test_loader = DataLoader(
|
|
datasets.MNIST("~/data", train=False, transform=mnist_transforms),
|
|
batch_size=64,
|
|
shuffle=True)
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
model = ConvNet()
|
|
model.to(device)
|
|
|
|
optimizer = optim.SGD(
|
|
model.parameters(), lr=config["lr"], momentum=config["momentum"])
|
|
for i in range(10):
|
|
train(model, optimizer, train_loader)
|
|
acc = test(model, test_loader)
|
|
|
|
# Send the current training result back to Tune
|
|
tune.report(mean_accuracy=acc)
|
|
|
|
if i % 5 == 0:
|
|
# This saves the model to the trial directory
|
|
torch.save(model.state_dict(), "./model.pth")
|
|
# __train_func_end__
|
|
# yapf: enable
|
|
|
|
# __eval_func_begin__
|
|
search_space = {
|
|
"lr": tune.sample_from(lambda spec: 10**(-10 * np.random.rand())),
|
|
"momentum": tune.uniform(0.1, 0.9)
|
|
}
|
|
|
|
# Uncomment this to enable distributed execution
|
|
# `ray.init(address="auto")`
|
|
|
|
# Download the dataset first
|
|
datasets.MNIST("~/data", train=True, download=True)
|
|
|
|
analysis = tune.run(train_mnist, config=search_space)
|
|
# __eval_func_end__
|
|
|
|
#__plot_begin__
|
|
dfs = analysis.trial_dataframes
|
|
[d.mean_accuracy.plot() for d in dfs.values()]
|
|
#__plot_end__
|
|
|
|
# __run_scheduler_begin__
|
|
analysis = tune.run(
|
|
train_mnist,
|
|
num_samples=20,
|
|
scheduler=ASHAScheduler(metric="mean_accuracy", mode="max"),
|
|
config=search_space)
|
|
|
|
# Obtain a trial dataframe from all run trials of this `tune.run` call.
|
|
dfs = analysis.trial_dataframes
|
|
# __run_scheduler_end__
|
|
|
|
# yapf: disable
|
|
# __plot_scheduler_begin__
|
|
# Plot by epoch
|
|
ax = None # This plots everything on the same plot
|
|
for d in dfs.values():
|
|
ax = d.mean_accuracy.plot(ax=ax, legend=False)
|
|
# __plot_scheduler_end__
|
|
# yapf: enable
|
|
|
|
# __run_searchalg_begin__
|
|
from hyperopt import hp
|
|
from ray.tune.suggest.hyperopt import HyperOptSearch
|
|
|
|
space = {
|
|
"lr": hp.loguniform("lr", 1e-10, 0.1),
|
|
"momentum": hp.uniform("momentum", 0.1, 0.9),
|
|
}
|
|
|
|
hyperopt_search = HyperOptSearch(space, metric="mean_accuracy", mode="max")
|
|
|
|
analysis = tune.run(train_mnist, num_samples=10, search_alg=hyperopt_search)
|
|
|
|
# To enable GPUs, use this instead:
|
|
# analysis = tune.run(
|
|
# train_mnist, config=search_space, resources_per_trial={'gpu': 1})
|
|
|
|
# __run_searchalg_end__
|
|
|
|
# __run_analysis_begin__
|
|
import os
|
|
|
|
df = analysis.results_df
|
|
logdir = analysis.get_best_logdir("mean_accuracy", mode="max")
|
|
state_dict = torch.load(os.path.join(logdir, "model.pth"))
|
|
|
|
model = ConvNet()
|
|
model.load_state_dict(state_dict)
|
|
# __run_analysis_end__
|
|
|
|
from ray.tune.examples.mnist_pytorch_trainable import TrainMNIST
|
|
|
|
# __trainable_run_begin__
|
|
search_space = {
|
|
"lr": tune.sample_from(lambda spec: 10**(-10 * np.random.rand())),
|
|
"momentum": tune.uniform(0.1, 0.9)
|
|
}
|
|
|
|
analysis = tune.run(
|
|
TrainMNIST, config=search_space, stop={"training_iteration": 10})
|
|
# __trainable_run_end__
|