This commit is contained in:
wassname
2020-04-11 15:37:55 +08:00
parent 4957e04bdc
commit 0a393b4e02
11 changed files with 606 additions and 329 deletions
+61 -41
View File
@@ -8,7 +8,11 @@ from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from test_tube import Experiment, HyperOptArgumentParser
from neural_processes.data.smart_meter import collate_fns, SmartMeterDataSet, get_smartmeter_df
from neural_processes.data.smart_meter import (
collate_fns,
SmartMeterDataSet,
get_smartmeter_df,
)
import torchvision.transforms as transforms
from neural_processes.plot import plot_from_loader_to_tensor, plot_from_loader
from argparse import ArgumentParser
@@ -20,20 +24,23 @@ import torch
import io
import PIL
from torchvision.transforms import ToTensor
from neural_processes.modules import BatchNormSequence
from neural_processes.data.smart_meter import get_smartmeter_df
from neural_processes.utils import ObjectDict
from neural_processes.lightning import PL_Seq2Seq
from ..logger import logger
from ..utils import hparams_power
class Seq2SeqNet(nn.Module):
def __init__(self, hparams, _min_std = 0.05):
def __init__(self, hparams, _min_std=0.05):
super().__init__()
hparams = hparams_power(hparams)
self.hparams = hparams
self._min_std = _min_std
self.norm_input = BatchNormSequence(self.hparams.input_size)
self.encoder = nn.LSTM(
input_size=self.hparams.input_size,
@@ -43,7 +50,9 @@ class Seq2SeqNet(nn.Module):
bidirectional=self.hparams.bidirectional,
dropout=self.hparams.lstm_dropout,
)
self.multihead_attn = nn.MultiheadAttention(self.hparams.hidden_size, num_heads=8)
self.multihead_attn = nn.MultiheadAttention(
self.hparams.hidden_size, num_heads=8
)
self.norm_target = BatchNormSequence(self.hparams.input_size_decoder)
self.decoder = nn.LSTM(
@@ -54,9 +63,8 @@ class Seq2SeqNet(nn.Module):
bidirectional=self.hparams.bidirectional,
dropout=self.hparams.lstm_dropout,
)
self.hidden_out_size = (
self.hparams.hidden_size
* (self.hparams.bidirectional + 1)
self.hidden_out_size = self.hparams.hidden_size * (
self.hparams.bidirectional + 1
)
self.mean = nn.Linear(self.hidden_out_size, self.hparams.output_size)
self.std = nn.Linear(self.hidden_out_size, self.hparams.output_size)
@@ -76,74 +84,86 @@ class Seq2SeqNet(nn.Module):
# context_x, d_encoded, target_x = k, v, q
# query, key, value = target_x, context_x, d_encoded
attn_output, _ = self.multihead_attn(h_out.permute(1, 0, 2), h_out.permute(1, 0, 2), h_out.permute(1, 0, 2))
attn_output, _ = self.multihead_attn(
h_out.permute(1, 0, 2), h_out.permute(1, 0, 2), h_out.permute(1, 0, 2)
)
h_out = attn_output.permute(1, 0, 2).contiguous()
attn_output, _ = self.multihead_attn(cell.permute(1, 0, 2), cell.permute(1, 0, 2), cell.permute(1, 0, 2))
attn_output, _ = self.multihead_attn(
cell.permute(1, 0, 2), cell.permute(1, 0, 2), cell.permute(1, 0, 2)
)
cell = attn_output.permute(1, 0, 2).contiguous()
outputs, (_, _) = self.decoder(target_x, (h_out, cell))
# output = [batch size, seq len, hid dim * n directions]
# outputs: [B, T, num_direction * H]
mean = self.mean(outputs)
log_sigma = self.std(outputs)
if self._use_lvar:
log_sigma = torch.clamp(log_sigma, math.log(self._min_std), -math.log(self._min_std))
log_sigma = torch.clamp(
log_sigma, math.log(self._min_std), -math.log(self._min_std)
)
sigma = torch.exp(log_sigma)
else:
sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
y_dist=torch.distributions.Normal(mean, sigma)
y_dist = torch.distributions.Normal(mean, sigma)
# Loss
loss_mse = loss_p = None
if target_y is not None:
loss_mse = F.mse_loss(mean, target_y, reduction='none')
loss_mse = F.mse_loss(mean, target_y, reduction="none")
if self._use_lvar:
loss_p = -log_prob_sigma(target_y, mean, log_sigma)
else:
loss_p = -y_dist.log_prob(target_y).mean(-1)
if self.hparams["context_in_target"]:
loss_p[:context_x.size(1)] /= 100
loss_mse[:context_x.size(1)] /= 100
loss_p[: context_x.size(1)] /= 100
loss_mse[: context_x.size(1)] /= 100
# # Don't catch loss on context window
# mean = mean[:, self.hparams.num_context:]
# log_sigma = log_sigma[:, self.hparams.num_context:]
y_pred = y_dist.rsample if self.training else y_dist.loc
return y_pred, dict(loss_p=loss_p.mean(), loss_mse=loss_mse.mean()), dict(log_sigma=log_sigma, dist=y_dist)
return (
y_pred,
dict(loss_p=loss_p.mean(), loss_mse=loss_mse.mean()),
dict(log_sigma=log_sigma, dist=y_dist),
)
class LSTMSeq2Seq_PL(PL_Seq2Seq):
def __init__(self, hparams,
MODEL_CLS=Seq2SeqNet, **kwargs):
super().__init__(hparams,
MODEL_CLS=MODEL_CLS, **kwargs)
def __init__(self, hparams, MODEL_CLS=Seq2SeqNet, **kwargs):
super().__init__(hparams, MODEL_CLS=MODEL_CLS, **kwargs)
DEFAULT_ARGS = {
"agg": "mean",
"lstm_dropout": 0.22,
"hidden_size_power": 4.0,
"learning_rate": 0.001,
"lstm_layers": 4,
'bidirectional': False
}
DEFAULT_ARGS = {'agg': 'mean', 'lstm_dropout': 0.12013231612195126, 'hidden_out_size_power': 4.0, 'hidden_size_power': 7.0, 'learning_rate': 0.0022924639229335475, 'nhead_power': 2.0, 'nlayers_power': 4.0}
@staticmethod
def add_suggest(trial):
# TODO make label name configurable
# TODO make data source configurable
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_uniform("lstm_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])
trial.suggest_categorical("bidirectional", [False, True])
trial.suggest_discrete_uniform("hidden_size_power", 3, 9, 1)
trial.suggest_int("lstm_layers", 1, 8)
trial.suggest_categorical("bidirectional", [False, True])
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_extra_target': 24*4,
'vis_i': '670',
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'output_size': 1
"batch_size": 16,
"grad_clip": 40,
"max_nb_epochs": 200,
"num_workers": 4,
"num_extra_target": 24 * 4,
"vis_i": "670",
"num_context": 24 * 4,
"input_size": 18,
"input_size_decoder": 17,
"context_in_target": False,
"output_size": 1,
}
return trial
+38 -31
View File
@@ -24,15 +24,14 @@ from neural_processes.data.smart_meter import get_smartmeter_df
from neural_processes.utils import ObjectDict
from ..lightning import PL_Seq2Seq
from torch.utils.data._utils.collate import default_collate
def collate_fn(batch, sample=None):
return default_collate(batch)
from ..logger import logger
from ..utils import hparams_power
class LSTMNet(nn.Module):
def __init__(self, hparams, _min_std = 0.05):
def __init__(self, hparams, _min_std=0.05):
super().__init__()
hparams = hparams_power(hparams)
self.hparams = hparams
self._min_std = _min_std
@@ -44,9 +43,8 @@ class LSTMNet(nn.Module):
bidirectional=self.hparams.bidirectional,
dropout=self.hparams.lstm_dropout,
)
self.hidden_out_size = (
self.hparams.hidden_size
* (self.hparams.bidirectional + 1)
self.hidden_out_size = self.hparams.hidden_size * (
self.hparams.bidirectional + 1
)
self.mean = nn.Linear(self.hidden_out_size, 1)
self.std = nn.Linear(self.hidden_out_size, 1)
@@ -62,7 +60,12 @@ class LSTMNet(nn.Module):
loss = None
if target_y is not None:
loss = F.mse_loss(y_pred * loss_scale, y[:, -steps:, :] * loss_scale, reduction='none') / loss_scale
loss = (
F.mse_loss(
y_pred * loss_scale, y[:, -steps:, :] * loss_scale, reduction="none"
)
/ loss_scale
)
assert torch.isfinite(loss)
@@ -70,35 +73,39 @@ class LSTMNet(nn.Module):
class LSTM_PL_STD(PL_Seq2Seq):
def __init__(self, hparams,
MODEL_CLS=LSTMNet, **kwargs):
super().__init__(hparams,
MODEL_CLS=MODEL_CLS, **kwargs)
def __init__(self, hparams, MODEL_CLS=LSTMNet, **kwargs):
super().__init__(hparams, MODEL_CLS=MODEL_CLS, **kwargs)
DEFAULT_ARGS = {'bidirectional': False, 'hidden_size_power': 4, 'learning_rate': 0.0010825329363784934, 'lstm_dropout': 0.3905792111699782, 'lstm_layers': 4}
DEFAULT_ARGS = {
"bidirectional": False,
"hidden_size_power": 4,
"learning_rate": 0.001,
"lstm_dropout": 0.39,
"lstm_layers": 4,
"bidirectional": False,
}
@staticmethod
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_uniform("lstm_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])
trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])
trial.suggest_uniform("lstm_dropout", 0, 0.85)
trial.suggest_discrete_uniform("hidden_size_power", 3, 9, 1)
trial.suggest_int("lstm_layers", 1, 8)
trial.suggest_categorical("bidirectional", [False, True])
# constants
trial._user_attrs = {
'batch_size': 16,
'grad_clip': 40,
'max_nb_epochs': 200,
'num_workers': 4,
'num_extra_target': 24*4,
'vis_i': '670',
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'output_size': 1,
'patience': 3,
"batch_size": 16,
"grad_clip": 40,
"max_nb_epochs": 200,
"num_workers": 4,
"num_extra_target": 24 * 4,
"vis_i": "670",
"num_context": 24 * 4,
"input_size": 18,
"input_size_decoder": 17,
"context_in_target": False,
"output_size": 1,
"patience": 3,
}
return trial
@@ -21,14 +21,14 @@ class PL_NeuralProcess(PL_Seq2Seq):
'det_enc_cross_attn_type': 'multihead',
'det_enc_self_attn_type': 'uniform',
'dropout': 0,
'hidden_dim': 128,
'latent_dim': 128,
'hidden_dim_power': 7,
'latent_dim_power': 7,
'latent_enc_self_attn_type': 'uniform',
'learning_rate': 0.002,
'n_decoder_layers': 4,
'n_det_encoder_layers': 4,
'n_latent_encoder_layers': 2,
'num_heads': 8,
'n_latent_encoder_layers_power': 1,
'num_heads_power': 3,
'use_deterministic_path': True,
'use_lvar': True,
'use_self_attn': True,
@@ -37,16 +37,23 @@ class PL_NeuralProcess(PL_Seq2Seq):
@staticmethod
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(8)])
trial.suggest_categorical("latent_dim", [8*2**i for i in range(8)])
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_int("attention_layers", 1, 4)
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_int("num_heads", 8, 8)
trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
trial.suggest_discrete_uniform(
"hidden_dim_power", 3, 11, 1
)
trial.suggest_discrete_uniform(
"latent_dim_power", 3, 11, 1
)
trial.suggest_int(
"n_latent_encoder_layers", 1, 11
)
trial.suggest_int("n_latent_encoder_layers", 1, 12)
trial.suggest_int("n_det_encoder_layers", 1, 12)
trial.suggest_int("n_decoder_layers", 1, 12)
trial.suggest_uniform("dropout", 0, 0.9)
trial.suggest_uniform("attention_dropout", 0, 0.9)
@@ -72,7 +79,7 @@ class PL_NeuralProcess(PL_Seq2Seq):
'vis_i': '670',
'num_extra_target': 24*4,
'x_dim': 18,
'context_in_target': True,
'context_in_target': False,
'y_dim': 1,
'patience': 3,
'min_std': 0.005,
@@ -6,7 +6,7 @@ import math
from neural_processes.modules import BatchNormSequence, BatchMLP, Attention, LSTMBlock
from neural_processes.utils import kl_loss_var, log_prob_sigma
from neural_processes.utils import hparams_power
class LatentEncoder(nn.Module):
def __init__(
@@ -195,6 +195,7 @@ class NeuralProcess(nn.Module):
@staticmethod
def FROM_HPARAMS(hparams):
hparams = hparams_power(hparams)
return NeuralProcess(**hparams)
def __init__(self,
+9 -6
View File
@@ -9,15 +9,15 @@ from torch.nn import functional as F
from torch.utils.data import DataLoader
from neural_processes.lightning import PL_Seq2Seq
from ..logger import logger
class NetTransformer(nn.Module):
def __init__(self, hparams):
super().__init__()
hparams["nlayers"] = int(2 ** hparams["nlayers_power"])
hparams["hidden_size"] = int(2**hparams["hidden_size_power"])
hparams["hidden_out_size"] = int(2 ** hparams["hidden_out_size_power"])
hparams["nhead"] = int(2 ** hparams["nhead_power"])
logger.debug(f"{type(self)} hparams {hparams}")
for k in hparams.keys():
if k.endswith("_power"):
k_new = k.replace("_power", "")
hparams[k_new] = int(2 ** hparams[k])
self.hparams = hparams
hidden_out_size = self.hparams.hidden_out_size
@@ -148,7 +148,7 @@ class PL_Transformer(PL_Seq2Seq):
)
trial.suggest_discrete_uniform("hidden_out_size_power", 2, 9, 1)
trial.suggest_discrete_uniform("nhead_power", 1, 4, 1)
trial.suggest_discrete_uniform("nlayers_power", 1, 5, 1)
trial.suggest_int("nlayers_power", 1, 12)
user_attrs_default = {
"batch_size": 16,
@@ -159,6 +159,9 @@ class PL_Transformer(PL_Seq2Seq):
"input_size": 6,
"output_size": 1,
"label_steps": 24,
"nan_value": -99.9,
'context_in_target': False,
'patience': 3,
}
[trial.set_user_attr(k, v) for k, v in user_attrs_default.items()]
[trial.set_user_attr(k, v) for k, v in user_attrs.items()]
+11 -6
View File
@@ -27,10 +27,13 @@ from neural_processes.modules import BatchNormSequence
from neural_processes.utils import ObjectDict
from neural_processes.lightning import PL_Seq2Seq
from ..logger import logger
from ..utils import hparams_power
class TransformerSeq2SeqNet(nn.Module):
def __init__(self, hparams, _min_std = 0.05):
super().__init__()
hparams = hparams_power(hparams)
self.hparams = hparams
self._min_std = _min_std
@@ -134,7 +137,7 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
super().__init__(hparams,
MODEL_CLS=MODEL_CLS, **kwargs)
DEFAULT_ARGS = {'agg': 'mean', 'attention_dropout': 0.12013231612195126, 'hidden_out_size_power': 4.0, 'hidden_size_power': 7.0, 'learning_rate': 0.0022924639229335475, 'nhead_power': 2.0, 'nlayers_power': 4.0}
DEFAULT_ARGS = {'agg': 'mean', 'attention_dropout': 0.12, 'hidden_out_size_power': 4, 'hidden_size_power': 7, 'learning_rate': 0.0023, 'nhead_power': 2, 'nlayers_power': 4}
@staticmethod
def add_suggest(trial: optuna.Trial):
@@ -155,10 +158,12 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
"""
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_uniform("attention_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("hidden_out_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("nlayers", [1, 2, 4, 6, 8, 16, 32])
trial.suggest_categorical("nhead", [1, 2, 8, 16])
trial.suggest_discrete_uniform(
"hidden_size_power", 2, 10, 1
)
trial.suggest_discrete_uniform("hidden_out_size_power", 2, 9, 1)
trial.suggest_discrete_uniform("nhead_power", 1, 4, 1)
trial.suggest_int("nlayers", 1, 12)
trial._user_attrs = {
'batch_size': 16,
@@ -170,7 +175,7 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
'num_context': 24*4,
'input_size': 18,
'input_size_decoder': 17,
'context_in_target': True,
'context_in_target': False,
'output_size': 1,
'patience': 3,
}
+1 -1
View File
@@ -106,7 +106,7 @@ def run_trial(
# Add user attributes
trial._user_attrs.update(user_attrs)
print('trial', trial)
print('trial', trial, trial.params, trial.user_attrs)
model, trainer = main(
trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
+49 -14
View File
@@ -55,28 +55,63 @@ class PyTorchLightningPruningCallback(EarlyStopping):
raise optuna.exceptions.TrialPruned(message)
# class ObjectDict(dict):
# """
# Interface similar to an argparser
# """
# def __init__(self):
# pass
# def __setattr__(self, attr, value):
# self[attr] = value
# return self[attr]
# def __getattr__(self, attr):
# if attr.startswith("_"):
# # https://stackoverflow.com/questions/10364332/how-to-pickle-python-object-derived-from-dict
# raise AttributeError
# try:
# return super().__getitem__(attr)
# except KeyError:
# # cPickle expects __getattr__ to raise AttributeError, not KeyError.
# raise AttributeError(self._KeyErrorString(name))
# @property
# def __dict__(self):
# return dict(self)
class ObjectDict(dict):
"""
Interface similar to an argparser
easy way to represent (hyper)parameters.
https://stackoverflow.com/a/50613966/221742
"""
__getattr__ = dict.__getitem__
__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__
def __init__(self):
pass
def __getstate__(self):
return self
def __setattr__(self, attr, value):
self[attr] = value
return self[attr]
def __setstate__(self, state):
self.update(state)
def __getattr__(self, attr):
if attr.startswith("_"):
# https://stackoverflow.com/questions/10364332/how-to-pickle-python-object-derived-from-dict
raise AttributeError
return dict(self)[attr]
def copy(self, **extra_params):
return ObjectDict(**self, **extra_params)
@property
def __dict__(self):
return dict(self)
def hparams_power(hparams):
"""Some value we want to go up in powers of 2
So any hyper param that ends in power will be used this way.
"""
hparams_old = hparams.copy()
for k in hparams_old.keys():
if k.endswith("_power"):
k_new = k.replace("_power", "")
hparams[k_new] = int(2 ** hparams[k])
return hparams
def log_prob_sigma(value, loc, log_scale):
"""A slightly more stable (not confirmed yet) log prob taking in log_var instead of scale.
+4
View File
@@ -1,3 +1,7 @@
# local package
-e .
# external requirements
torch>=1.3.0
tqdm
pandas
+10
View File
@@ -0,0 +1,10 @@
from setuptools import find_packages, setup
setup(
name='neural_processes',
packages=find_packages(),
version='0.1.0',
description='Attentive Neural Processes',
author='wassname',
license='Apachev2',
)
+400 -215
View File
File diff suppressed because one or more lines are too long