[tune] a tiny ptl example (#11497)

This commit is contained in:
Richard Liaw
2020-10-22 18:50:34 -07:00
committed by Alex Wu
parent 2d9b7355ba
commit 395ddb093c
7 changed files with 146 additions and 7 deletions
+5 -5
View File
@@ -276,7 +276,7 @@ matrix:
env:
- RLLIB_TESTING=1 RLLIB_QUICK_TRAIN_AND_MISC_TESTS=1
# TODO (sven): Remove this after fixing rllib tests num_cpus.
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- PYTHON=3.6
- TF_VERSION=2.1.0
- TFP_VERSION=0.8
@@ -297,7 +297,7 @@ matrix:
env:
- RLLIB_TESTING=1 RLLIB_EXAMPLE_DIR_TESTS=1
# TODO (sven): Remove this after fixing rllib tests num_cpus.
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- PYTHON=3.6
- TF_VERSION=2.1.0
- TFP_VERSION=0.8
@@ -318,7 +318,7 @@ matrix:
env:
- RLLIB_TESTING=1 RLLIB_TESTS_DIR_TESTS_A_TO_L=1
# TODO (sven): Remove this after fixing rllib tests num_cpus.
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- PYTHON=3.6
- TF_VERSION=2.1.0
- TFP_VERSION=0.8
@@ -336,7 +336,7 @@ matrix:
env:
- RLLIB_TESTING=1 RLLIB_TESTS_DIR_TESTS_M_TO_Z=1
# TODO (sven): Remove this after fixing rllib tests num_cpus.
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- RAY_USE_MULTIPROCESSING_CPU_COUNT=1
- PYTHON=3.6
- TF_VERSION=2.1.0
- TFP_VERSION=0.8
@@ -357,7 +357,7 @@ matrix:
- PYTHON=3.6
- TF_VERSION=2.2.0
- TFP_VERSION=0.8
- TORCH_VERSION=1.5
- TORCH_VERSION=1.6
- PYTHONWARNINGS=ignore
install:
- . ./ci/travis/ci.sh init RAY_CI_TUNE_AFFECTED
+6 -1
View File
@@ -44,10 +44,15 @@ PyTorch Examples
~~~~~~~~~~~~~~~~
- :doc:`/tune/examples/mnist_pytorch`: Converts the PyTorch MNIST example to use Tune with the function-based API. Also shows how to easily convert something relying on argparse to use Tune.
- :doc:`/tune/examples/mnist_pytorch_lightning`: Uses `Pytorch Lightning <https://github.com/PyTorchLightning/pytorch-lightning>`_ to train a MNIST model. This example utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks <tune-integration-pytorch-lightning>`. See also :ref:`this tutorial for a full walkthrough <tune-pytorch-lightning>`.
- :doc:`/tune/examples/mnist_pytorch_trainable`: Converts the PyTorch MNIST example to use Tune with Trainable API. Also uses the HyperBandScheduler and checkpoints the model at the end.
- :doc:`/tune/examples/ddp_mnist_torch`: An example showing how to use DistributedDataParallel with Ray Tune. This enables both distributed training and distributed hyperparameter tuning.
Pytorch Lightning Examples
~~~~~~~~~~~~~~~~~~~~~~~~~~
- :doc:`/tune/examples/mnist_ptl_mini`: A minimal example of using `Pytorch Lightning <https://github.com/PyTorchLightning/pytorch-lightning>`_ to train a MNIST model. This example utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks <tune-integration-pytorch-lightning>`. See also :ref:`this tutorial for a full walkthrough <tune-pytorch-lightning>`.
- :doc:`/tune/examples/mnist_pytorch_lightning`: A comprehensive example using `Pytorch Lightning <https://github.com/PyTorchLightning/pytorch-lightning>`_ to train a MNIST model. This example showcases how to use various search optimization techniques. It utilizes the Ray Tune-provided :ref:`PyTorch Lightning callbacks <tune-integration-pytorch-lightning>`. See also :ref:`this tutorial for a full walkthrough <tune-pytorch-lightning>`.
XGBoost Example
~~~~~~~~~~~~~~~
@@ -0,0 +1,6 @@
:orphan:
mnist_ptl_mini
~~~~~~~~~~~~~~
.. literalinclude:: /../../python/ray/tune/examples/mnist_ptl_mini.py
+9
View File
@@ -475,6 +475,15 @@ py_test(
args = ["--smoke-test"]
)
py_test(
name = "mnist_ptl_mini",
size = "medium",
srcs = ["examples/mnist_ptl_mini.py"],
deps = [":tune_lib"],
tags = ["exclusive", "example", "pytorch"],
args = ["--smoke-test"]
)
py_test(
name = "mnist_pytorch_trainable",
size = "small",
+117
View File
@@ -0,0 +1,117 @@
import torch
from torch.nn import functional as F
import pytorch_lightning as pl
from pl_bolts.datamodules import MNISTDataModule
import os
from ray.tune.integration.pytorch_lightning import TuneReportCallback
import tempfile
from ray import tune
class LightningMNISTClassifier(pl.LightningModule):
def __init__(self, config, data_dir=None):
super(LightningMNISTClassifier, self).__init__()
self.data_dir = data_dir or os.getcwd()
self.lr = config["lr"]
layer_1, layer_2 = config["layer_1"], config["layer_2"]
# mnist images are (1, 28, 28) (channels, width, height)
self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
self.layer_2 = torch.nn.Linear(layer_1, layer_2)
self.layer_3 = torch.nn.Linear(layer_2, 10)
self.accuracy = pl.metrics.Accuracy()
def forward(self, x):
batch_size, channels, width, height = x.size()
x = x.view(batch_size, -1)
x = self.layer_1(x)
x = torch.relu(x)
x = self.layer_2(x)
x = torch.relu(x)
x = self.layer_3(x)
x = torch.log_softmax(x, dim=1)
return x
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.lr)
def training_step(self, train_batch, batch_idx):
x, y = train_batch
logits = self.forward(x)
loss = F.nll_loss(logits, y)
acc = self.accuracy(logits, y)
self.log("ptl/train_loss", loss)
self.log("ptl/train_accuracy", acc)
return loss
def validation_step(self, val_batch, batch_idx):
x, y = val_batch
logits = self.forward(x)
loss = F.nll_loss(logits, y)
acc = self.accuracy(logits, y)
return {"val_loss": loss, "val_accuracy": acc}
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean()
self.log("ptl/val_loss", avg_loss)
self.log("ptl/val_accuracy", avg_acc)
def train_mnist_tune(config, data_dir=None, num_epochs=10, num_gpus=0):
model = LightningMNISTClassifier(config, data_dir)
dm = MNISTDataModule(
data_dir=data_dir, num_workers=1, batch_size=config["batch_size"])
metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"}
trainer = pl.Trainer(
max_epochs=num_epochs,
gpus=num_gpus,
progress_bar_refresh_rate=0,
callbacks=[TuneReportCallback(metrics, on="validation_end")])
trainer.fit(model, dm)
def tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0):
data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_")
# Download data
MNISTDataModule(data_dir=data_dir).prepare_data()
config = {
"layer_1": tune.choice([32, 64, 128]),
"layer_2": tune.choice([64, 128, 256]),
"lr": tune.loguniform(1e-4, 1e-1),
"batch_size": tune.choice([32, 64, 128]),
}
trainable = tune.with_parameters(
train_mnist_tune,
data_dir=data_dir,
num_epochs=num_epochs,
num_gpus=gpus_per_trial)
tune.run(
trainable,
resources_per_trial={
"cpu": 1,
"gpu": gpus_per_trial
},
metric="loss",
mode="min",
config=config,
num_samples=num_samples,
name="tune_mnist")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--smoke-test", action="store_true", help="Finish quickly for testing")
args, _ = parser.parse_known_args()
if args.smoke_test:
tune_mnist(num_samples=1, num_epochs=1, gpus_per_trial=0)
else:
tune_mnist(num_samples=10, num_epochs=10, gpus_per_trial=0)
@@ -185,7 +185,8 @@ def train(netD, netG, optimG, optimD, criterion, dataloader, iteration, device,
netD.zero_grad()
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size, ), real_label, device=device)
label = torch.full(
(b_size, ), real_label, dtype=torch.float, device=device)
output = netD(real_cpu).view(-1)
errD_real = criterion(output, label)
errD_real.backward()
+1
View File
@@ -18,6 +18,7 @@ nevergrad
optuna
pytest-remotedata>=0.3.1
pytorch-lightning
pytorch-lightning-bolts
scikit-optimize
sigopt
smart_open