test set, inputnorm, lstm before encoder

This commit is contained in:
wassname
2020-03-15 12:23:18 +08:00
parent 38ab6cac23
commit 62c377b05f
13 changed files with 1395 additions and 2392 deletions
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+7 -5
View File
@@ -155,7 +155,7 @@ def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), use_logy=Fals
df['dayofweek'] = time.dt.dayofweek / 7.0
# Drop nan and 0's
df = df[df['energy(kWh/hh)']!=0]
df = df[df['energy(kWh/hh)'] != 0]
df = df.dropna()
if use_logy:
@@ -163,7 +163,9 @@ def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), use_logy=Fals
df = df.sort_values('tstp')
# split data
n_split = -int(len(df)*0.1)
df_train = df[:n_split]
df_test = df[n_split:]
return df_train, df_test
test_split= -int(len(df) * 0.1)
val_split= int(len(df) * 0.15)
df_test = df[:val_split]
df_train = df[val_split:test_split]
df_val = df[test_split:]
return df_train, df_val, df_test
+45 -48
View File
@@ -69,7 +69,7 @@ class LatentModelPL(pl.LightningModule):
# agg and print self.train_logs HACK https://github.com/PyTorchLightning/pytorch-lightning/issues/100
train_logs = self.agg_logs(self.train_logs)
train_logs_str = {k: f"{v.mean()}" for k, v in train_logs.items()}
train_logs_str = {k: f"{v}" for k, v in train_logs.items()}
self.train_logs = []
print(f"step val {self.trainer.global_step}, {tensorboard_logs_str} {train_logs}")
return logs
@@ -95,10 +95,10 @@ class LatentModelPL(pl.LightningModule):
if isinstance(outputs[0][j], dict):
# Take mean of sub dicts
keys = outputs[0][j].keys()
aggs[j] = {k: torch.stack([x[j][k] for x in outputs if k in x[j]]).mean() for k in keys}
aggs[j] = {k: torch.stack([x[j][k] for x in outputs if k in x[j]]).mean().item() for k in keys}
else:
# Take mean of numbers
aggs[j] = torch.stack([x[j] for x in outputs if j in x]).mean()
aggs[j] = torch.stack([x[j] for x in outputs if j in x]).mean().item()
return aggs
# # Log hparams with metric, doesn't work
@@ -117,15 +117,14 @@ class LatentModelPL(pl.LightningModule):
return self.validation_end(*args, **kwargs)
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"], weight_decay=0)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, patience=1, verbose=True, min_lr=1e-7) # note early stopping has patience 3
optim = torch.optim.AdamW(self.parameters(), lr=self.hparams["learning_rate"], weight_decay=0)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, patience=self.hparams["patience"], verbose=True, min_lr=1e-7) # note early stopping has patience 3
return [optim], [scheduler]
def _get_cache_dfs(self):
if self._dfs is None:
df_train, df_test = get_smartmeter_df()
# self._dfs = dict(df_train=df_train[:600], df_test=df_test[:600])
self._dfs = dict(df_train=df_train, df_test=df_test)
df_train, df_val, df_test = get_smartmeter_df()
self._dfs = dict(df_train=df_train, df_val=df_val, df_test=df_test)
return self._dfs
def train_dataloader(self):
@@ -144,7 +143,7 @@ class LatentModelPL(pl.LightningModule):
)
def val_dataloader(self):
df_test = self._get_cache_dfs()['df_test']
df_test = self._get_cache_dfs()['df_val']
data_test = SmartMeterDataSet(
df_test, self.hparams["num_context"], self.hparams["num_extra_target"]
)
@@ -172,49 +171,47 @@ class LatentModelPL(pl.LightningModule):
)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser], add_help=False)
parser.opt_range("--learning_rate", default=1e-3, type=float, tunable=True, high=1e-2, low=1e-5, log_base=10)
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(6)])
trial.suggest_categorical("latent_dim", [8*2**i for i in range(6)])
parser.opt_list("--hidden_dim", default=128, type=int, tunable=True, options=[8*2**i for i in range(8)])
parser.opt_list("--latent_dim", default=128, type=int, tunable=True, options=[8*2**i for i in range(8)])
parser.add_argument("--num_heads", default=8, type=int)
parser.add_argument("--attention_layers", default=1, type=int)
parser.opt_list("--n_latent_encoder_layers", default=4, type=int, tunable=True, options=[1, 2, 4, 8, 16])
parser.opt_list("--n_det_encoder_layers", default=4, type=int, tunable=True, options=[1, 2, 4, 8, 16])
parser.opt_list("--n_decoder_layers", default=2, type=int, tunable=True, options=[1, 2, 4, 8, 16])
trial.suggest_int("attention_layers", 1, 4)
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 8])
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 8])
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 8])
trial.suggest_int("num_heads", 8, 8)
parser.opt_range("--dropout", default=0, type=float, tunable=True, low=0, high=0.75)
parser.opt_range("--attention_dropout", default=0, type=float, tunable=True, low=0, high=0.75)
parser.add_argument("--min_std", default=0.005, type=float)
trial.suggest_uniform("dropout", 0, 0.9)
trial.suggest_uniform("attention_dropout", 0, 0.9)
parser.opt_list(
"--latent_enc_self_attn_type", default="multihead", type=str, tunable=True, options=['uniform', 'dot', 'multihead', 'ptmultihead']
trial.suggest_categorical(
"latent_enc_self_attn_type", ['uniform', 'multihead', 'ptmultihead']
)
parser.opt_list("--det_enc_self_attn_type", default="multihead", type=str, tunable=True, options=['uniform', 'dot', 'multihead', 'ptmultihead'])
parser.opt_list("--det_enc_cross_attn_type", default="multihead", type=str, tunable=True, options=['uniform', 'dot', 'multihead', 'ptmultihead'])
trial.suggest_categorical("det_enc_self_attn_type", ['uniform', 'multihead', 'ptmultihead'])
trial.suggest_categorical("det_enc_cross_attn_type", ['uniform', 'multihead', 'ptmultihead'])
parser.opt_list("--use_lvar", default=False, type=bool, tunable=True, options=[False, True])
parser.opt_list("--use_rnn", default=False, type=bool, tunable=True, options=[False, True])
parser.opt_list("--use_deterministic_path", default=True, tunable=True, type=bool, options=[False, True])
parser.opt_list("--use_self_attn", default=True, tunable=True, type=bool, options=[False, True])
parser.opt_list("--batchnorm", default=True, tunable=True, type=bool, options=[False, True])
# training specific (for this model)
parser.add_argument("--context_in_target", default=True, type=bool)
parser.add_argument("--grad_clip", default=0, type=float)
parser.add_argument("--num_context", type=int, default=24 * 2)
parser.add_argument("--num_extra_target", type=int, default=24)
parser.add_argument("--max_nb_epochs", default=20, type=int)
parser.add_argument("--num_workers", default=4, type=int)
trial.suggest_categorical("batchnorm", [False, True])
trial.suggest_categorical("use_self_attn", [False, True])
trial.suggest_categorical("use_lvar", [False, True])
trial.suggest_categorical("use_deterministic_path", [False, True])
trial.suggest_categorical("use_rnn", [True, False])
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_context': 24* 4,
'vis_i': '670',
'num_extra_target': 24*4,
'x_dim': 18,
'context_in_target': True,
'y_dim': 1,
'patience': 3,
'min_std': 0.005,
}
return trial
parser.add_argument("--batch_size", default=16, type=int)
parser.add_argument("--x_dim", default=16, type=int)
parser.add_argument("--y_dim", default=1, type=int)
parser.add_argument("--vis_i", default=670, type=int)
return parser
+37 -24
View File
@@ -132,7 +132,7 @@ class LSTM_PL(pl.LightningModule):
def validation_end(self, outputs):
# TODO send an image to tensroboard, like in the lighting_anp.py file
if int(self.hparams["vis_i"]) > 0:
loader = self.val_dataloader()[0]
loader = self.val_dataloader()
vis_i = min(int(self.hparams["vis_i"]), len(loader.dataset))
if isinstance(self.hparams["vis_i"], str):
image = plot_from_loader(loader, self, vis_i=vis_i, window_len=self.hparams["window_length"])
@@ -163,15 +163,14 @@ class LSTM_PL(pl.LightningModule):
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim, patience=2, verbose=True, min_lr=1e-5
optim, patience=self.hparams["patience"], verbose=True, min_lr=1e-5
) # note early stopping has patient 3
return [optim], [scheduler]
def _get_cache_dfs(self):
if self._dfs is None:
df_train, df_test = get_smartmeter_df()
# self._dfs = dict(df_train=df_train[:600], df_test=df_test[:600])
self._dfs = dict(df_train=df_train, df_test=df_test)
df_train, df_val, df_test = get_smartmeter_df()
self._dfs = dict(df_train=df_train, df_val=df_val, df_test=df_test)
return self._dfs
@pl.data_loader
@@ -193,7 +192,7 @@ class LSTM_PL(pl.LightningModule):
@pl.data_loader
def val_dataloader(self):
df_test = self._get_cache_dfs()["df_test"]
df_test = self._get_cache_dfs()["df_val"]
dset_test = SequenceDfDataSet(
df_test,
self.hparams,
@@ -216,27 +215,41 @@ class LSTM_PL(pl.LightningModule):
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
@staticmethod
def add_model_specific_args(parent_parser):
def add_suggest(trial: optuna.Trial):
"""
Specify the hyperparams for this LightningModule
Add hyperparam ranges to an optuna trial and typical user attrs.
Usage:
trial = optuna.trial.FixedTrial(
params={
'hidden_size': 128,
}
)
trial = add_suggest(trial)
trainer = pl.Trainer()
model = LSTM_PL(dict(**trial.params, **trial.user_attrs), dataset_train,
dataset_test, cache_base_path, norm)
trainer.fit(model)
"""
# MODEL specific
parser = HyperOptArgumentParser(parents=[parent_parser])
parser.add_argument("--learning_rate", default=0.002, type=float)
parser.add_argument("--batch_size", default=16, type=int)
parser.add_argument("--lstm_dropout", default=0.5, type=float)
parser.add_argument("--hidden_size", default=16, type=int)
parser.add_argument("--input_size", default=8, type=int)
parser.add_argument("--lstm_layers", default=8, type=int)
parser.add_argument("--bidirectional", default=False, type=bool)
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_uniform("lstm_dropout", 0, 0.75)
trial.suggest_categorical(
"hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]
)
trial.suggest_categorical("lstm_layers", [1, 2, 3, 4, 6, 8])
trial.suggest_categorical("bidirectional", [False, True])
# training specific (for this model)
parser.add_argument("--window_length", type=int, default=12)
parser.add_argument("--target_length", type=int, default=2)
parser.add_argument("--max_nb_epochs", default=10, type=int)
parser.add_argument("--num_workers", default=4, type=int)
return parser
trial._user_attrs = {
"batch_size": 16,
"grad_clip": 40,
"max_nb_epochs": 200,
"num_workers": 4,
"vis_i": 670,
"input_size": 6,
"output_size": 1,
"patience": 2,
}
return trial
def plot_from_loader(loader, model, vis_i=670, n=1, window_len=0):
+47 -28
View File
@@ -20,7 +20,7 @@ import torch
import io
import PIL
from torchvision.transforms import ToTensor
from src.models.modules import BatchNormSequence
from src.data.smart_meter import get_smartmeter_df
from src.utils import ObjectDict
@@ -41,6 +41,9 @@ class Seq2SeqNet(nn.Module):
self.hparams = hparams
self._min_std = _min_std
self.norm_input = BatchNormSequence(self.hparams.input_size)
self.encoder = nn.LSTM(
input_size=self.hparams.input_size,
hidden_size=self.hparams.hidden_size,
@@ -49,6 +52,9 @@ class Seq2SeqNet(nn.Module):
bidirectional=self.hparams.bidirectional,
dropout=self.hparams.lstm_dropout,
)
self.multihead_attn = nn.MultiheadAttention(self.hparams.hidden_size, num_heads=8)
self.norm_target = BatchNormSequence(self.hparams.input_size_decoder)
self.decoder = nn.LSTM(
input_size=self.hparams.input_size_decoder,
hidden_size=self.hparams.hidden_size,
@@ -66,9 +72,23 @@ class Seq2SeqNet(nn.Module):
def forward(self, context_x, context_y, target_x, target_y=None):
x = torch.cat([context_x, context_y], -1)
# Sometimes input normalisation can be important, an initial batch norm is a nice way to ensure this
x = self.norm_input(x)
target_x = self.norm_target(target_x)
_, (h_out, cell) = self.encoder(x)
# hidden = [batch size, n layers * n directions, hid dim]
# cell = [batch size, n layers * n directions, hid dim]
# context_x, d_encoded, target_x = k, v, q
# query, key, value = target_x, context_x, d_encoded
attn_output, _ = self.multihead_attn(h_out.permute(1, 0, 2), h_out.permute(1, 0, 2), h_out.permute(1, 0, 2))
h_out = attn_output.permute(1, 0, 2).contiguous()
attn_output, _ = self.multihead_attn(cell.permute(1, 0, 2), cell.permute(1, 0, 2), cell.permute(1, 0, 2))
cell = attn_output.permute(1, 0, 2).contiguous()
outputs, (_, _) = self.decoder(target_x, (h_out, cell))
# output = [batch size, seq len, hid dim * n directions]
@@ -155,7 +175,7 @@ class LSTMSeq2Seq_PL(pl.LightningModule):
def show_image(self):
# https://github.com/PytorchLightning/pytorch-lightning/blob/f8d9f8f/pytorch_lightning/core/lightning.py#L293
loader = self.val_dataloader()[0]
loader = self.val_dataloader()
vis_i = min(int(self.hparams["vis_i"]), len(loader.dataset))
# print('vis_i', vis_i)
if isinstance(self.hparams["vis_i"], str):
@@ -174,15 +194,14 @@ class LSTMSeq2Seq_PL(pl.LightningModule):
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim, patience=2, verbose=True, min_lr=1e-5
optim, patience=self.hparams["patience"], verbose=True, min_lr=1e-5
) # note early stopping has patient 3
return [optim], [scheduler]
def _get_cache_dfs(self):
if self._dfs is None:
df_train, df_test = get_smartmeter_df()
# self._dfs = dict(df_train=df_train[:600], df_test=df_test[:600])
self._dfs = dict(df_train=df_train, df_test=df_test)
df_train, df_val, df_test = get_smartmeter_df()
self._dfs = dict(df_train=df_train, df_val=df_val, df_test=df_test)
return self._dfs
@pl.data_loader
@@ -203,7 +222,7 @@ class LSTMSeq2Seq_PL(pl.LightningModule):
@pl.data_loader
def val_dataloader(self):
df_test = self._get_cache_dfs()['df_test']
df_test = self._get_cache_dfs()['df_val']
data_test = SmartMeterDataSet(
df_test, self.hparams["num_context"], self.hparams["num_extra_target"]
)
@@ -232,25 +251,25 @@ class LSTMSeq2Seq_PL(pl.LightningModule):
)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = HyperOptArgumentParser(parents=[parent_parser])
parser.add_argument("--learning_rate", default=0.002, type=float)
parser.add_argument("--batch_size", default=16, type=int)
parser.add_argument("--lstm_dropout", default=0.5, type=float)
parser.add_argument("--hidden_size", default=16, type=int)
parser.add_argument("--input_size", default=8, type=int)
parser.add_argument("--input_size_decoder", default=8, type=int)
parser.add_argument("--lstm_layers", default=8, type=int)
parser.add_argument("--bidirectional", default=False, type=bool)
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_uniform("lstm_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])
trial.suggest_categorical("bidirectional", [False, True])
# training specific (for this model)
parser.add_argument("--num_context", type=int, default=12)
parser.add_argument("--num_extra_target", type=int, default=2)
parser.add_argument("--max_nb_epochs", default=10, type=int)
parser.add_argument("--num_workers", default=4, type=int)
return parser
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_extra_target': 24*4,
'vis_i': '670',
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'output_size': 1
}
return trial
+28 -27
View File
@@ -165,7 +165,7 @@ class LSTM_PL(pl.LightningModule):
def validation_end(self, outputs):
# TODO send an image to tensroboard, like in the lighting_anp.py file
if int(self.hparams["vis_i"]) > 0:
loader = self.val_dataloader()[0]
loader = self.val_dataloader()
vis_i = min(int(self.hparams["vis_i"]), len(loader.dataset))
if isinstance(self.hparams["vis_i"], str):
image = plot_from_loader(loader, self, vis_i=vis_i)
@@ -196,15 +196,14 @@ class LSTM_PL(pl.LightningModule):
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim, patience=2, verbose=True, min_lr=1e-5
optim, patience=self.hparams["patience"], verbose=True, min_lr=1e-5
) # note early stopping has patient 3
return [optim], [scheduler]
def _get_cache_dfs(self):
if self._dfs is None:
df_train, df_test = get_smartmeter_df()
# self._dfs = dict(df_train=df_train[:600], df_test=df_test[:600])
self._dfs = dict(df_train=df_train, df_test=df_test)
df_train, df_val, df_test = get_smartmeter_df()
self._dfs = dict(df_train=df_train, df_val=df_val, df_test=df_test)
return self._dfs
@pl.data_loader
@@ -226,7 +225,7 @@ class LSTM_PL(pl.LightningModule):
@pl.data_loader
def val_dataloader(self):
df_test = self._get_cache_dfs()["df_test"]
df_test = self._get_cache_dfs()["df_val"]
dset_test = SequenceDfDataSet(
df_test,
self.hparams,
@@ -249,27 +248,29 @@ class LSTM_PL(pl.LightningModule):
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
@staticmethod
def add_model_specific_args(parent_parser):
"""
Specify the hyperparams for this LightningModule
"""
# MODEL specific
parser = HyperOptArgumentParser(parents=[parent_parser])
parser.add_argument("--learning_rate", default=0.002, type=float)
parser.add_argument("--batch_size", default=16, type=int)
parser.add_argument("--lstm_dropout", default=0.5, type=float)
parser.add_argument("--hidden_size", default=16, type=int)
parser.add_argument("--input_size", default=8, type=int)
parser.add_argument("--lstm_layers", default=8, type=int)
parser.add_argument("--bidirectional", default=False, type=bool)
# training specific (for this model)
parser.add_argument("--window_length", type=int, default=12)
parser.add_argument("--target_length", type=int, default=2)
parser.add_argument("--max_nb_epochs", default=10, type=int)
parser.add_argument("--num_workers", default=4, type=int)
return parser
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_uniform("lstm_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])
trial.suggest_categorical("bidirectional", [False, True])
# constants
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_extra_target': 24*4,
'vis_i': '670',
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'output_size': 1,
'patience': 3,
}
return trial
def plot_from_loader(loader, model, vis_i=670, n=1, window_len=0):
+31 -7
View File
@@ -5,7 +5,7 @@ from torch.utils.data import TensorDataset, DataLoader
import math
from src.models.modules import LatentEncoder, DeterministicEncoder, Decoder
from src.models.modules import BatchNormSequence
def log_prob_sigma(value, loc, log_scale):
"""A slightly more stable (not confirmed yet) log prob taking in log_var instead of scale.
@@ -66,18 +66,32 @@ class LatentModel(nn.Module):
self._use_rnn = use_rnn
self.context_in_target = context_in_target
# Sometimes input normalisation can be important, an initial batch norm is a nice way to ensure this
self.norm_x = BatchNormSequence(x_dim)
self.norm_y = BatchNormSequence(y_dim)
if self._use_rnn:
self._lstm = nn.LSTM(
self._lstm_x = nn.LSTM(
input_size=x_dim,
hidden_size=hidden_dim,
num_layers=attention_layers,
dropout=dropout,
batch_first=True
)
self._lstm_y = nn.LSTM(
input_size=y_dim,
hidden_size=hidden_dim,
num_layers=attention_layers,
dropout=dropout,
batch_first=True
)
x_dim = hidden_dim
y_dim2 = hidden_dim
else:
y_dim2 = y_dim
self._latent_encoder = LatentEncoder(
x_dim + y_dim,
x_dim + y_dim2,
hidden_dim=hidden_dim,
latent_dim=latent_dim,
self_attention_type=latent_enc_self_attn_type,
@@ -93,7 +107,7 @@ class LatentModel(nn.Module):
)
self._deterministic_encoder = DeterministicEncoder(
input_dim=x_dim + y_dim,
input_dim=x_dim + y_dim2,
x_dim=x_dim,
hidden_dim=hidden_dim,
self_attention_type=det_enc_self_attn_type,
@@ -126,16 +140,26 @@ class LatentModel(nn.Module):
def forward(self, context_x, context_y, target_x, target_y=None):
# https://stackoverflow.com/a/46772183/221742
target_x = self.norm_x(target_x)
context_x = self.norm_x(context_x)
context_y = self.norm_y(context_y)
if self._use_rnn:
# see https://arxiv.org/abs/1910.09323 where x is substituted with h = RNN(x)
# x need to be provided as [B, T, H]
target_x, _ = self._lstm(target_x)
context_x, _ = self._lstm(context_x)
target_x, _ = self._lstm_x(target_x)
context_x, _ = self._lstm_x(context_x)
context_y, _ = self._lstm_y(context_y)
dist_prior, log_var_prior = self._latent_encoder(context_x, context_y)
if target_y is not None:
dist_post, log_var_post = self._latent_encoder(target_x, target_y)
target_y2 = self.norm_y(target_y)
if self._use_rnn:
target_y2, _ = self._lstm_y(target_y2)
dist_post, log_var_post = self._latent_encoder(target_x, target_y2)
z = dist_post.loc
else:
z = dist_prior.loc
+16
View File
@@ -24,6 +24,22 @@ class LSTMBlock(nn.Module):
return self._lstm(x)[0]
class BatchNormSequence(nn.Module):
"""Applies batch norm on features of a batch first sequence."""
def __init__(
self, out_channels
):
super().__init__()
self.norm = nn.BatchNorm1d(out_channels)
def forward(self, x):
# x.shape is (Batch, Sequence, Channels)
# Now we want to apply batchnorm and dropout to the channels. So we put it in shape
# (Batch, Channels, Sequence) so we can use BatchNorm1d
x = x.permute(0, 2, 1)
x = self.norm(x)
return x.permute(0, 2, 1)
class NPBlockRelu2d(nn.Module):
"""Block for Neural Processes."""
+43 -29
View File
@@ -19,9 +19,11 @@ from matplotlib import pyplot as plt
import torch
import io
import PIL
import optuna
from torchvision.transforms import ToTensor
from src.data.smart_meter import get_smartmeter_df
from src.models.modules import BatchNormSequence
from src.utils import ObjectDict
@@ -41,7 +43,6 @@ class TransformerSeq2SeqNet(nn.Module):
self.hparams = hparams
self._min_std = _min_std
# TODO project to 8*nhead
hidden_out_size = self.hparams.hidden_out_size
self.enc_emb = nn.Linear(self.hparams.input_size, hidden_out_size)
layer_enc = nn.TransformerEncoderLayer(
@@ -92,7 +93,7 @@ class TransformerSeq2SeqNet(nn.Module):
log_sigma = torch.clamp(log_sigma, math.log(self._min_std), -math.log(self._min_std))
sigma = torch.exp(log_sigma)
y_dist=torch.distributions.Normal(mean, sigma)
y_dist = torch.distributions.Normal(mean, sigma)
# Loss
loss_mse = loss_p = None
@@ -188,15 +189,14 @@ class TransformerSeq2Seq_PL(pl.LightningModule):
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim, patience=2, verbose=True, min_lr=1e-5
) # note early stopping has patient 3
optim, patience=self.hparams["patience"], verbose=True, min_lr=1e-7
) # note early stopping has patience 3
return [optim], [scheduler]
def _get_cache_dfs(self):
if self._dfs is None:
df_train, df_test = get_smartmeter_df()
# self._dfs = dict(df_train=df_train[:600], df_test=df_test[:600])
self._dfs = dict(df_train=df_train, df_test=df_test)
df_train, df_val, df_test = get_smartmeter_df()
self._dfs = dict(df_train=df_train, df_val=df_val, df_test=df_test)
return self._dfs
@pl.data_loader
@@ -217,7 +217,7 @@ class TransformerSeq2Seq_PL(pl.LightningModule):
@pl.data_loader
def val_dataloader(self):
df_test = self._get_cache_dfs()['df_test']
df_test = self._get_cache_dfs()['df_val']
data_test = SmartMeterDataSet(
df_test, self.hparams["num_context"], self.hparams["num_extra_target"]
)
@@ -246,27 +246,41 @@ class TransformerSeq2Seq_PL(pl.LightningModule):
)
@staticmethod
def add_model_specific_args(parent_parser):
def add_suggest(trial: optuna.Trial):
"""
Specify the hyperparams for this LightningModule
Add hyperparam ranges to an optuna trial and typical user attrs.
Usage:
trial = optuna.trial.FixedTrial(
params={
'hidden_size': 128,
}
)
trial = add_suggest(trial)
trainer = pl.Trainer()
model = LSTM_PL(dict(**trial.params, **trial.user_attrs), dataset_train,
dataset_test, cache_base_path, norm)
trainer.fit(model)
"""
# MODEL specific
parser = HyperOptArgumentParser(parents=[parent_parser])
parser.add_argument("--learning_rate", default=0.002, type=float)
parser.add_argument("--batch_size", default=16, type=int)
parser.add_argument("--attention_dropout", default=0.5, type=float)
parser.add_argument("--hidden_size", default=16, type=int)
parser.add_argument("--hidden_out_size", default=16, type=int)
parser.add_argument("--input_size", default=8, type=int)
parser.add_argument("--nhead", default=8, type=int)
parser.add_argument("--input_size_decoder", default=8, type=int)
parser.add_argument("--nlayers", default=8, type=int)
# parser.add_argument("--bidirectional", default=False, type=bool)
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_uniform("attention_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("hidden_out_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("nlayers", [1, 2, 4, 8])
trial.suggest_categorical("nhead", [1, 2, 8, 16])
# training specific (for this model)
parser.add_argument("--num_context", type=int, default=12)
parser.add_argument("--num_extra_target", type=int, default=2)
parser.add_argument("--max_nb_epochs", default=10, type=int)
parser.add_argument("--num_workers", default=4, type=int)
return parser
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_extra_target': 24*4,
'vis_i': '670',
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'output_size': 1,
'patience': 3,
}
return trial
+114
View File
@@ -0,0 +1,114 @@
from pytorch_lightning.callbacks import EarlyStopping
from optuna.integration.pytorch_lightning import _check_pytorch_lightning_availability
from pathlib import Path
import optuna
import pytorch_lightning as pl
import torch
from .dict_logger import DictLogger
from .utils import PyTorchLightningPruningCallback
from .plot import plot_from_loader
def main(
trial: optuna.Trial,
PL_MODEL_CLS: pl.LightningModule,
name: str,
MODEL_DIR: Path = Path("./lightning_logs"),
train=True,
prune=True,
PERCENT_TEST_EXAMPLES=0.5,
):
# PyTorch Lightning will try to restore model parameters from previous trials if checkpoint
# filenames match. Therefore, the filenames for each trial must be made unique.
checkpoint_callback = pl.callbacks.ModelCheckpoint(
MODEL_DIR / name / "version_{}".format(trial.number) / "chk",
monitor="val_loss",
mode="min",
)
# The default logger in PyTorch Lightning writes to event files to be consumed by
# TensorBoard. We create a simple logger instead that holds the log in memory so that the
# final accuracy can be obtained after optimization. When using the default logger, the
# final accuracy could be stored in an attribute of the `Trainer` instead.
logger = DictLogger(MODEL_DIR, name=name, version=trial.number)
# print("log_dir", logger.experiment.log_dir)
hparams = dict(**trial.params, **trial.user_attrs)
trainer = pl.Trainer(
logger=logger,
val_percent_check=PERCENT_TEST_EXAMPLES,
checkpoint_callback=checkpoint_callback,
max_epochs=hparams["max_nb_epochs"],
gpus=-1 if torch.cuda.is_available() else None,
early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss")
if prune
else EarlyStopping(
patience=hparams["patience"] * 2, monitor="val_loss", verbose=True
),
)
model = PL_MODEL_CLS(hparams)
if train:
trainer.fit(model)
return model, trainer
def objective(trial, PL_MODEL_CLS):
# see https://github.com/optuna/optuna/blob/cf6f02d/examples/pytorch_lightning_simple.py
trial = PL_MODEL_CLS.add_suggest(trial)
print("trial", trial.number, "params", trial.params)
model, trainer = main(trial)
# also report to tensorboard & print
print("logger.metrics", model.logger.metrics[-1:])
model.logger.experiment.add_hparams(trial.params, logger.metrics[-1])
model.logger.save()
return model.logger.metrics[-1]["val_loss"]
def add_number(trial: optuna.Trial, model_dir: Path):
# For manual experiment we will start at -1 and deincr by 1
versions = [int(s.stem.split("_")[-1]) for s in model_dir.glob("version_*")] + [-1]
trial.number = min(versions) - 1
print("trial.number", trial.number)
return trial
def run_trial(
name: str,
PL_MODEL_CLS: pl.LightningModule,
params: dict = {},
user_attrs: dict = {},
MODEL_DIR: Path = Path("./lightning_logs"),
):
print(f"now run `tensorboard --logdir {MODEL_DIR}`")
(MODEL_DIR / name).mkdir(parents=True, exist_ok=True)
trial = optuna.trial.FixedTrial(params=params)
trial = PL_MODEL_CLS.add_suggest(trial)
trial = add_number(trial, MODEL_DIR / name)
trial._user_attrs.update(user_attrs)
model, trainer = main(
trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
)
trainer.fit(model)
# Load checkpoint
checkpoint = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))[-1]
device = next(model.parameters()).device
print(f"Loading checkpoint {checkpoint}")
model = model.load_from_checkpoint(checkpoint).to(device)
trainer.test(model)
# Plot
loader = model.val_dataloader()
dset_test = loader.dataset
label_names = dset_test.label_names
plot_from_loader(model.val_dataloader(), model, i=670, title='val 670')
plot_from_loader(model.train_dataloader(), model, i=670, title='train 670')
plot_from_loader(model.test_dataloader(), model, i=670, title='test 670')
return trial, trainer, model
+21 -7
View File
@@ -1,5 +1,18 @@
from pytorch_lightning.callbacks import EarlyStopping
from optuna.integration.pytorch_lightning import _check_pytorch_lightning_availability
from pathlib import Path
import numpy as np
import torch
import optuna
def init_random_seed(seed):
# https://pytorch.org/docs/stable/notes/randomness.html
np.random.seed(seed)
torch.random.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
class PyTorchLightningPruningCallback(EarlyStopping):
"""Optuna PyTorch Lightning callback to prune unpromising trials.
@@ -20,10 +33,10 @@ class PyTorchLightningPruningCallback(EarlyStopping):
how this dictionary is formatted.
"""
def __init__(self, trial, monitor):
def __init__(self, trial, monitor, **kwargs):
# type: (optuna.trial.Trial, str) -> None
super(PyTorchLightningPruningCallback, self).__init__(monitor)
super().__init__(monitor, **kwargs)
_check_pytorch_lightning_availability()
@@ -41,25 +54,26 @@ class PyTorchLightningPruningCallback(EarlyStopping):
message = "Trial was pruned at epoch {}.".format(epoch)
raise optuna.exceptions.TrialPruned(message)
class ObjectDict(dict):
"""
Interface similar to an argparser
"""
def __init__(self):
pass
def __setattr__(self, attr, value):
self[attr] = value
return self[attr]
def __getattr__(self, attr):
if attr.startswith('_'):
if attr.startswith("_"):
# https://stackoverflow.com/questions/10364332/how-to-pickle-python-object-derived-from-dict
raise AttributeError
return dict(self)[attr]
@property
def __dict__(self):
return dict(self)