mirror of
https://github.com/wassname/seq2seq-time.git
synced 2026-06-27 19:16:40 +08:00
tidy
This commit is contained in:
@@ -8,27 +8,35 @@
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
def noop(x):
|
||||
return x
|
||||
|
||||
|
||||
def shortcut(c_in, c_out):
|
||||
return nn.Sequential(*[nn.Conv1d(c_in, c_out, kernel_size=1),
|
||||
nn.BatchNorm1d(c_out)])
|
||||
|
||||
class Inception(nn.Module):
|
||||
def __init__(self, c_in, bottleneck=32, ks=40, nb_filters=32):
|
||||
return nn.Sequential(
|
||||
*[nn.Conv1d(c_in, c_out, kernel_size=1), nn.BatchNorm1d(c_out)]
|
||||
)
|
||||
|
||||
|
||||
class InceptionLayer(nn.Module):
|
||||
def __init__(self, c_in, bottleneck=32, kernel_size=40, nb_filters=32):
|
||||
|
||||
super().__init__()
|
||||
self.bottleneck = nn.Conv1d(c_in, bottleneck, 1) if bottleneck and c_in > 1 else noop
|
||||
self.bottleneck = (
|
||||
nn.Conv1d(c_in, bottleneck, 1) if bottleneck and c_in > 1 else noop
|
||||
)
|
||||
mts_feat = bottleneck or c_in
|
||||
conv_layers = []
|
||||
kss = [ks // (2**i) for i in range(3)]
|
||||
kss = [kernel_size // (2 ** i) for i in range(3)]
|
||||
# ensure odd kss until nn.Conv1d with padding='same' is available in pytorch 1.3
|
||||
kss = [ksi if ksi % 2 != 0 else ksi - 1 for ksi in kss]
|
||||
kss = [ksi if ksi % 2 != 0 else ksi - 1 for ksi in kss]
|
||||
for i in range(len(kss)):
|
||||
conv_layers.append(
|
||||
nn.Conv1d(mts_feat, nb_filters, kernel_size=kss[i], padding=kss[i] // 2))
|
||||
nn.Conv1d(mts_feat, nb_filters, kernel_size=kss[i], padding=kss[i] // 2)
|
||||
)
|
||||
self.conv_layers = nn.ModuleList(conv_layers)
|
||||
self.maxpool = nn.MaxPool1d(3, stride=1, padding=1)
|
||||
self.conv = nn.Conv1d(c_in, nb_filters, kernel_size=1)
|
||||
@@ -40,40 +48,52 @@ class Inception(nn.Module):
|
||||
x = self.bottleneck(input_tensor)
|
||||
for i in range(3):
|
||||
out_ = self.conv_layers[i](x)
|
||||
if i == 0: out = out_
|
||||
else: out = torch.cat((out, out_), 1)
|
||||
if i == 0:
|
||||
out = out_
|
||||
else:
|
||||
out = torch.cat((out, out_), 1)
|
||||
mp = self.conv(self.maxpool(input_tensor))
|
||||
inc_out = torch.cat((out, mp), 1)
|
||||
return self.act(self.bn(inc_out))
|
||||
|
||||
|
||||
class InceptionBlock(nn.Module):
|
||||
def __init__(self,c_in,bottleneck=32,ks=40,nb_filters=32,residual=True,depth=6):
|
||||
def __init__(
|
||||
self, c_in, bottleneck=32, kernel_size=40, nb_filters=32, residual=True, num_layers=6
|
||||
):
|
||||
|
||||
super().__init__()
|
||||
|
||||
self.residual = residual
|
||||
self.depth = depth
|
||||
self.num_layers = num_layers
|
||||
|
||||
#inception & residual layers
|
||||
# inception & residual layers
|
||||
inc_mods = []
|
||||
res_layers = []
|
||||
res = 0
|
||||
for d in range(depth):
|
||||
for d in range(num_layers):
|
||||
inc_mods.append(
|
||||
Inception(c_in if d == 0 else nb_filters * 4, bottleneck=bottleneck if d > 0 else 0,ks=ks,
|
||||
nb_filters=nb_filters))
|
||||
InceptionLayer(
|
||||
c_in if d == 0 else nb_filters * 4,
|
||||
bottleneck=bottleneck if d > 0 else 0,
|
||||
kernel_size=kernel_size,
|
||||
nb_filters=nb_filters,
|
||||
)
|
||||
)
|
||||
if self.residual and d % 3 == 2:
|
||||
res_layers.append(shortcut(c_in if res == 0 else nb_filters * 4, nb_filters * 4))
|
||||
res_layers.append(
|
||||
shortcut(c_in if res == 0 else nb_filters * 4, nb_filters * 4)
|
||||
)
|
||||
res += 1
|
||||
else: res_layer = res_layers.append(None)
|
||||
else:
|
||||
res_layer = res_layers.append(None)
|
||||
self.inc_mods = nn.ModuleList(inc_mods)
|
||||
self.res_layers = nn.ModuleList(res_layers)
|
||||
self.act = nn.ReLU()
|
||||
|
||||
|
||||
def forward(self, x):
|
||||
res = x
|
||||
for d, l in enumerate(range(self.depth)):
|
||||
for d, l in enumerate(range(self.num_layers)):
|
||||
x = self.inc_mods[d](x)
|
||||
if self.residual and d % 3 == 2:
|
||||
res = self.res_layers[d](res)
|
||||
@@ -81,18 +101,47 @@ class InceptionBlock(nn.Module):
|
||||
res = x
|
||||
x = self.act(x)
|
||||
return x
|
||||
|
||||
class InceptionTime(nn.Module):
|
||||
def __init__(self,c_in,c_out,bottleneck=32,ks=40,nb_filters=32,residual=True,depth=6):
|
||||
|
||||
|
||||
|
||||
class InceptionTimeSeq(nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
x_dim,
|
||||
y_dim,
|
||||
hidden_size=32,
|
||||
layers=6,
|
||||
kernel_size=40,
|
||||
bottleneck=16,
|
||||
residual=True
|
||||
):
|
||||
super().__init__()
|
||||
self.block = InceptionBlock(c_in,bottleneck=bottleneck,ks=ks,nb_filters=nb_filters,
|
||||
residual=residual,depth=depth)
|
||||
self.gap = nn.AdaptiveAvgPool1d(1)
|
||||
self.fc = nn.Linear(nb_filters * 4, c_out)
|
||||
self.inc_block = InceptionBlock(
|
||||
x_dim + y_dim,
|
||||
bottleneck=bottleneck,
|
||||
kernel_size=kernel_size,
|
||||
nb_filters=hidden_size,
|
||||
residual=residual,
|
||||
num_layers=layers,
|
||||
)
|
||||
self._min_std = 0.01
|
||||
self.mean = nn.Linear(hidden_size*4, y_dim)
|
||||
self.std = nn.Linear(hidden_size*4, y_dim)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.block(x)
|
||||
x = self.gap(x).squeeze(-1)
|
||||
x = self.fc(x)
|
||||
return x
|
||||
def forward(self, past_x, past_y, future_x, future_y=None):
|
||||
device = next(self.parameters()).device
|
||||
B, S, _ = future_x.shape
|
||||
future_y_fake = past_y[:, -1:, :].repeat(1, S, 1).to(device)
|
||||
context = torch.cat([past_x, past_y], -1)
|
||||
target = torch.cat([future_x, future_y_fake], -1)
|
||||
x = torch.cat([context, target * 1], 1).detach()
|
||||
|
||||
out = self.inc_block(x.permute(0, 2, 1)).permute(0, 2, 1)
|
||||
|
||||
# Seems to help a little, especially with extrapolating out of bounds
|
||||
steps = past_y.shape[1]
|
||||
mean = self.mean(out)[:, steps:, :]
|
||||
log_sigma = self.std(out)[:, steps:, :]
|
||||
|
||||
sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
|
||||
return torch.distributions.Normal(mean, sigma), {}
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
class LSTMSeq(nn.Module):
|
||||
def __init__(self, input_size, output_size, hidden_size=32, lstm_layers=2, lstm_dropout=0, _min_std = 0.05, nan_value=0):
|
||||
super().__init__()
|
||||
self._min_std = _min_std
|
||||
self.nan_value = nan_value
|
||||
|
||||
self.lstm = nn.LSTM(
|
||||
input_size=input_size + output_size,
|
||||
hidden_size=hidden_size,
|
||||
batch_first=True,
|
||||
num_layers=lstm_layers,
|
||||
dropout=lstm_dropout,
|
||||
)
|
||||
self.mean = nn.Linear(hidden_size, output_size)
|
||||
self.std = nn.Linear(hidden_size, output_size)
|
||||
|
||||
def forward(self, past_x, past_y, future_x, future_y=None):
|
||||
device = next(self.parameters()).device
|
||||
x = torch.cat([past_x, past_y], -1).detach()
|
||||
|
||||
steps = future_x.shape[1]
|
||||
outputs, _ = self.lstm(x)
|
||||
outputs = outputs[:, -steps:, :]
|
||||
|
||||
# outputs: [B, T, num_direction * H]
|
||||
mean = self.mean(outputs)
|
||||
log_sigma = self.std(outputs)
|
||||
sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
|
||||
y_dist = torch.distributions.Normal(mean, sigma)
|
||||
return y_dist, {}
|
||||
@@ -145,7 +145,7 @@ class TemporalConvNet(nn.Module):
|
||||
return out
|
||||
|
||||
|
||||
class TCNSeq2Seq(nn.Module):
|
||||
class TCNSeq(nn.Module):
|
||||
"""
|
||||
See:
|
||||
- https://arxiv.org/pdf/1803.01271.pdf
|
||||
|
||||
@@ -40,12 +40,6 @@ class Transformer(nn.Module):
|
||||
target = torch.cat([future_x, future_y_fake], -1).detach()
|
||||
x = torch.cat([context, target * 1], 1).detach()
|
||||
|
||||
# Masks
|
||||
x_mask = torch.isfinite(x) & (x != self.nan_value)
|
||||
x[~x_mask] = 0
|
||||
x = x.detach()
|
||||
x_key_padding_mask = ~x_mask.any(-1)
|
||||
|
||||
x = self.enc_emb(x).permute(1, 0, 2)
|
||||
|
||||
S, B, _ = x.shape
|
||||
|
||||
@@ -1,73 +0,0 @@
|
||||
from tqdm.auto import tqdm
|
||||
from torch import nn
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
import fast_transformers
|
||||
from fast_transformers.builders import TransformerEncoderBuilder
|
||||
|
||||
class TransformerAutoR(nn.Module):
|
||||
def __init__(self, x_dim, y_dim, hidden_out_size=256, nlayers=8, n_heads=8, use_lstm=False, attention_dropout=0, dropout=0, min_std=0.01):
|
||||
super().__init__()
|
||||
self._min_std = min_std
|
||||
self.use_lstm = use_lstm
|
||||
hidden_out_size = hidden_out_size//n_heads
|
||||
|
||||
x_size = x_dim + y_dim
|
||||
|
||||
# TODO embedd both X's the same
|
||||
if use_lstm:
|
||||
self.x_emb = LSTMBlock(x_size, x_size)
|
||||
|
||||
self.enc_emb = nn.Linear(x_size, hidden_out_size*n_heads)
|
||||
self.encoder = fast_transformers.builders.TransformerEncoderBuilder.from_kwargs(
|
||||
attention_type="causal-linear",
|
||||
n_layers=nlayers,
|
||||
n_heads=n_heads,
|
||||
feed_forward_dimensions=hidden_out_size*8*n_heads,
|
||||
query_dimensions=hidden_out_size,
|
||||
value_dimensions=hidden_out_size,
|
||||
attention_dropout=attention_dropout,
|
||||
dropout=dropout,
|
||||
).get()
|
||||
self.mean = nn.Linear(hidden_out_size*n_heads, y_dim)
|
||||
self.std = nn.Linear(hidden_out_size*n_heads, y_dim)
|
||||
|
||||
def forward(self, past_x, past_y, future_x, future_y=None, mask_context=True, mask_target=True):
|
||||
device = next(self.parameters()).device
|
||||
B, S, _ = future_x.shape
|
||||
future_y_fake = past_y[:, -1:, :].repeat(1, S, 1).to(device)
|
||||
# future_y_fake = (
|
||||
# torch.ones(past_y.shape[0], future_x.shape[1], past_y.shape[2]).float().to(device) * 0
|
||||
# )
|
||||
context = torch.cat([past_x, past_y], -1)
|
||||
target = torch.cat([future_x, future_y_fake], -1)
|
||||
x = torch.cat([context, target * 1], 1).detach()
|
||||
|
||||
# LSTM
|
||||
if self.use_lstm:
|
||||
x = self.x_emb(x)
|
||||
# Size([B, T, Y]) -> Size([B, T, Y])
|
||||
|
||||
# Embed
|
||||
x = self.enc_emb(x)
|
||||
|
||||
# requires (B, C, hidden_dim)
|
||||
steps = past_y.shape[1]
|
||||
N = x.shape[1]
|
||||
mask = fast_transformers.masking.TriangularCausalMask(N, device=device)
|
||||
outputs = self.encoder(x, attn_mask=mask)[:, steps:, :]
|
||||
|
||||
# Size([B, T, emb_dim])
|
||||
mean = self.mean(outputs)
|
||||
log_sigma = self.std(outputs)
|
||||
sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
|
||||
y_dist = torch.distributions.Normal(mean, sigma)
|
||||
|
||||
return (
|
||||
y_dist,
|
||||
{}
|
||||
)
|
||||
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from ..util import mask_upper_triangular
|
||||
|
||||
class TransformerSeq(nn.Module):
|
||||
"""
|
||||
A single transformer, masking nan or 0
|
||||
"""
|
||||
def __init__(self, x_dim, y_dim, attention_dropout=0, nhead=8, nlayers=2, hidden_size=16, nan_value=0, min_std=0.01):
|
||||
super().__init__()
|
||||
self._min_std = min_std
|
||||
self.nan_value = nan_value
|
||||
enc_x_dim = x_dim + y_dim
|
||||
|
||||
self.enc_emb = nn.Linear(enc_x_dim, hidden_size)
|
||||
encoder_norm = nn.LayerNorm(hidden_size)
|
||||
layer_enc = nn.TransformerEncoderLayer(
|
||||
d_model=hidden_size,
|
||||
dim_feedforward=hidden_size*4,
|
||||
dropout=attention_dropout,
|
||||
nhead=nhead,
|
||||
# activation
|
||||
)
|
||||
self.encoder = nn.TransformerEncoder(
|
||||
layer_enc, num_layers=nlayers, norm=encoder_norm
|
||||
)
|
||||
self.mean = nn.Linear(hidden_size, y_dim)
|
||||
self.std = nn.Linear(hidden_size, y_dim)
|
||||
|
||||
def forward(self, past_x, past_y, future_x, future_y=None):
|
||||
device = next(self.parameters()).device
|
||||
x = torch.cat([past_x, past_y], -1).detach()
|
||||
|
||||
# Masks
|
||||
x_mask = torch.isfinite(x) & (x != self.nan_value)
|
||||
x[~x_mask] = 0
|
||||
x = x.detach()
|
||||
x_key_padding_mask = ~x_mask.any(-1)
|
||||
|
||||
x = self.enc_emb(x).permute(1, 0, 2)
|
||||
|
||||
outputs = self.encoder(x, src_key_padding_mask=x_key_padding_mask).permute(
|
||||
1, 0, 2
|
||||
)
|
||||
|
||||
# Seems to help a little, especially with extrapolating out of bounds
|
||||
steps = future_x.shape[1]
|
||||
mean = self.mean(outputs)[:, -steps:, :]
|
||||
log_sigma = self.std(outputs)[:, -steps:, :]
|
||||
|
||||
sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
|
||||
return torch.distributions.Normal(mean, sigma), {}
|
||||
|
||||
@@ -48,7 +48,7 @@ class CrossAttention(nn.Module):
|
||||
|
||||
x = self.enc_emb(x).permute(1, 0, 2)
|
||||
|
||||
B, S, _ = x.shape
|
||||
S, B, _ = x.shape
|
||||
mask = mask_upper_triangular(S, device)
|
||||
|
||||
outputs = self.encoder(x, mask=mask#, src_key_padding_mask=x_key_padding_mask
|
||||
|
||||
Reference in New Issue
Block a user