mirror of
https://github.com/wassname/DeepTime.git
synced 2026-06-27 20:02:21 +08:00
seq_len and other fixes
This commit is contained in:
@@ -7,7 +7,7 @@ build.variables_dict = {
|
||||
# 'ForecastDataset.features': ['m', 'h', 'd'],
|
||||
'deeptime3.base_learner': ['Ridge', 'None', 'Transformer'],
|
||||
'deeptime3.inr': ['INR', 'INRPlus2'],
|
||||
'deeptime3.encoder': ['inception', ],
|
||||
'deeptime3.encoder': ['inception', 'lstm', 'mlp', 'lstm2', 'transformer', 'transformer2', 'none'],
|
||||
# 'deeptime3.dropout': [0.0, 0.1, 0.3, 0.5,],
|
||||
}
|
||||
|
||||
@@ -28,11 +28,11 @@ train.clip = 10.
|
||||
|
||||
Checkpoint.patience = 7
|
||||
|
||||
deeptime3.layer_size = 32
|
||||
deeptime3.layer_size = 256
|
||||
deeptime3.inr_layers = 5
|
||||
deeptime3.dropout = 0.3
|
||||
deeptime3.dropout = 0.1
|
||||
deeptime3.base_learner = 'Ridge'
|
||||
deeptime3.n_fourier_feats = 2048
|
||||
deeptime3.n_fourier_feats = 4096
|
||||
deeptime3.scales = [0.01, 0.1, 1, 5, 10, 20, 50, 100]
|
||||
|
||||
ForecastDataset.data_path = 'stocks/OXY_2019.csv.gz'
|
||||
@@ -44,4 +44,4 @@ ForecastDataset.time_features = []
|
||||
ForecastDataset.normalise_time_features = True
|
||||
ForecastDataset.features = 'M2S'
|
||||
ForecastDataset.horizon_len = 46
|
||||
ForecastDataset.lookback_mult = 2
|
||||
ForecastDataset.lookback_mult = 3
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
from os.path import join
|
||||
import math
|
||||
import logging
|
||||
import json
|
||||
from typing import Callable, Optional, Union, Dict, Tuple
|
||||
|
||||
import gin
|
||||
@@ -31,8 +32,11 @@ class ForecastExperiment(Experiment):
|
||||
val_set, val_loader = get_data(flag='val')
|
||||
test_set, test_loader = get_data(flag='test')
|
||||
|
||||
dim_size=train_set.data_x.shape[1]
|
||||
seq_len = train_set[0][1].shape
|
||||
model = get_model(model_type,
|
||||
dim_size=train_set.data_x.shape[1],
|
||||
dim_size=dim_size,
|
||||
seq_len=seq_len,
|
||||
datetime_feats=train_set.timestamps.shape[-1]).to(default_device())
|
||||
checkpoint = Checkpoint(self.root)
|
||||
|
||||
@@ -43,7 +47,8 @@ class ForecastExperiment(Experiment):
|
||||
val_metrics = validate(model, loader=val_loader, report_metrics=True)
|
||||
test_metrics = validate(model, loader=test_loader, report_metrics=True,
|
||||
save_path=self.root if save_vals else None)
|
||||
np.save(join(self.root, 'metrics.npy'), {'val': val_metrics, 'test': test_metrics})
|
||||
# np.save(join(self.root, 'metrics.npy'), {'val': val_metrics, 'test': test_metrics})
|
||||
json.dump({'val': val_metrics, 'test': test_metrics}, open(join(self.root, 'metrics.npy', 'w')))
|
||||
|
||||
val_metrics = {f'ValMetric/{k}': v for k, v in val_metrics.items()}
|
||||
test_metrics = {f'TestMetric/{k}': v for k, v in test_metrics.items()}
|
||||
|
||||
@@ -72,6 +72,7 @@ TODO:
|
||||
- [x] M2S mode
|
||||
- [ ] add other INR's
|
||||
- [ ] add None as learner
|
||||
- [ ] no encoder?
|
||||
|
||||
```
|
||||
python -m experiments.forecast --config_path=experiments/configs/hp_search/Stocks.gin build_experiment
|
||||
|
||||
+32
-19
@@ -20,28 +20,36 @@ from models.modules.encoders import LSTMEncoder, TransformerEncoder2, Transforme
|
||||
# from models.modules.regressors import RidgeRegressor
|
||||
|
||||
@gin.configurable()
|
||||
def deeptime3(dim_size:int, datetime_feats: int, layer_size: int, inr_layers: int, n_fourier_feats: int, scales: float, dropout: float, base_learner: str, encoder:str, inr: str):
|
||||
return DeepTIMe3(dim_size, datetime_feats, layer_size, inr_layers, n_fourier_feats, scales, dropout, base_learner, encoder, inr)
|
||||
def deeptime3(dim_size:int, datetime_feats: int, layer_size: int, inr_layers: int, n_fourier_feats: int, scales: float, dropout: float, base_learner: str, encoder:str, inr: str, seq_len: int):
|
||||
return DeepTIMe3(dim_size, datetime_feats, layer_size, inr_layers, n_fourier_feats, scales, dropout, base_learner, encoder, inr, seq_len)
|
||||
|
||||
|
||||
class DeepTIMe3(nn.Module):
|
||||
def __init__(self, dim_size: int, datetime_feats: int, layer_size: int, inr_layers: int, n_fourier_feats: int, scales: float, dropout: float=0.3, base_learner:str='Ridge', encoder:str='inception', inr:str='INR'):
|
||||
def __init__(self, dim_size: int, datetime_feats: int, layer_size: int, inr_layers: int, n_fourier_feats: int, scales: float, dropout: float=0.3, base_learner:str='Ridge', encoder:str='inception', inr:str='INR', seq_len: int=46):
|
||||
super().__init__()
|
||||
|
||||
# encode the past
|
||||
encoded_size = layer_size
|
||||
encoder_features = 24
|
||||
encoder_layers = 3
|
||||
if encoder == 'inception':
|
||||
encoded_size = layer_size
|
||||
self.encoder = CausalInceptionTimePlus(
|
||||
c_in=dim_size, c_out=encoded_size,
|
||||
# nf=24, depth=4,
|
||||
nf=17, depth=3,
|
||||
bn=True,
|
||||
dilation=2,
|
||||
ks=[39, 19, 3],
|
||||
coord=True, fc_dropout=dropout,
|
||||
self.encoder = InceptionEncoder(
|
||||
c_in=dim_size, c_out=encoded_size, dilation=6,
|
||||
layer_size=17, layers=encoder_layers, dropout=dropout,
|
||||
)
|
||||
elif encoder == 'lstm':
|
||||
self.encoder = LSTMEncoder()
|
||||
self.encoder = LSTMEncoder(c_in=dim_size, c_out=encoded_size, dropout=dropout, layers=encoder_layers, layer_size=24)
|
||||
elif encoder == 'lstm2':
|
||||
self.encoder = LSTMEncoder2(c_in=dim_size, c_out=encoded_size, dropout=dropout, layers=encoder_layers, layer_size=32, seq_len=seq_len)
|
||||
elif encoder == 'mlp':
|
||||
self.encoder = MLPEncoder(c_in=dim_size, c_out=encoded_size, dropout=dropout, layers=encoder_layers, layer_size=256)
|
||||
elif encoder == 'transformer':
|
||||
self.encoder = TransformerEncoder(c_in=dim_size, c_out=encoded_size, dropout=dropout, layers=encoder_layers, layer_size=256, seq_len=seq_len)
|
||||
elif encoder == 'transformer2':
|
||||
self.encoder = TransformerEncoder2(c_in=dim_size, c_out=encoded_size, dropout=dropout, layers=encoder_layers, layer_size=256, seq_len=seq_len)
|
||||
elif encoder == 'none':
|
||||
self.encoder = None
|
||||
encoded_size = 0
|
||||
else:
|
||||
raise NotADirectoryError(encoder)
|
||||
|
||||
@@ -49,10 +57,10 @@ class DeepTIMe3(nn.Module):
|
||||
coord_size = 1
|
||||
in_feats=datetime_feats+encoded_size+coord_size
|
||||
if inr=='INRPlus2':
|
||||
self.inr = INRPlus2(in_feats=in_feats, layers=inr_layers, layer_size=layer_size,
|
||||
n_fourier_feats=n_fourier_feats, scales=scales, dropout=dropout)
|
||||
self.inr = INRPlus2(in_feats=in_feats, out_feats=layer_size, layers=inr_layers, layer_size=max(17, layer_size//8),
|
||||
n_fourier_feats=n_fourier_feats//4, scales=scales, dropout=dropout)
|
||||
elif inr=="INR":
|
||||
self.inr = INR(in_feats=in_feats, layers=inr_layers, layer_size=layer_size,
|
||||
self.inr = INR(in_feats=in_feats, out_feats=layer_size, layers=inr_layers, layer_size=layer_size,
|
||||
n_fourier_feats=n_fourier_feats, scales=scales, dropout=dropout)
|
||||
else:
|
||||
raise NotImplementedError(inr)
|
||||
@@ -75,15 +83,20 @@ class DeepTIMe3(nn.Module):
|
||||
|
||||
# we summarize the past into a single hidden layer. Then repeat it for each coordinate
|
||||
past_len = time.shape[1]
|
||||
encoded_x = self.encoder(past_x.transpose(2, 1))
|
||||
encoded_x = repeat(encoded_x, "b f -> b t f", t=past_len)
|
||||
if self.encoder is not None:
|
||||
encoded_x = self.encoder(past_x)
|
||||
encoded_x = repeat(encoded_x, "b f -> b t f", t=past_len)
|
||||
|
||||
|
||||
# relative coordinates are the same for each batch, so we make them once and repeat them
|
||||
coords = self.get_coords(past_len).to(time.device) + offset
|
||||
coords = repeat(coords, "1 t 1 -> b t 1", b=time.shape[0])
|
||||
|
||||
# combine and run INR to decode the representation
|
||||
context_input = torch.cat([encoded_x, coords, time], dim=-1)
|
||||
if self.encoder is not None:
|
||||
context_input = torch.cat([encoded_x, coords, time], dim=-1)
|
||||
else:
|
||||
context_input = torch.cat([coords, time], dim=-1)
|
||||
context_repr = self.inr(context_input)
|
||||
return context_repr
|
||||
|
||||
|
||||
+1
-1
@@ -13,7 +13,7 @@ def get_model(model_type: str, **kwargs: Union[int, float]) -> torch.nn.Module:
|
||||
elif model_type=="deeptime2":
|
||||
model = deeptime2(datetime_feats=kwargs['datetime_feats'], dim_size=kwargs['dim_size'])
|
||||
elif model_type=="deeptime3":
|
||||
model = deeptime3(datetime_feats=kwargs['datetime_feats'], dim_size=kwargs['dim_size'])
|
||||
model = deeptime3(datetime_feats=kwargs['datetime_feats'], dim_size=kwargs['dim_size'], seq_len=kwargs['seq_len'])
|
||||
else:
|
||||
raise ValueError(f"Unknown model type {model_type}")
|
||||
return model
|
||||
|
||||
@@ -44,10 +44,10 @@ class LinBnDropSN(nn.Sequential):
|
||||
|
||||
|
||||
class InceptionEncoder(nn.Module):
|
||||
def __init__(self, c_in, c_out, *args, **kwargs):
|
||||
def __init__(self, c_in, c_out, dropout, layers, layer_size, *args, **kwargs):
|
||||
super().__init__()
|
||||
self.net = CausalInceptionTimePlus(
|
||||
c_in=c_in, c_out=c_out, custom_head=custom_head, *args, **kwargs
|
||||
c_in=c_in, c_out=c_out, ks=[39, 19, 3], custom_head=custom_head, coord=True, fc_dropout=dropout, bn=True, depth=layers, nf=layer_size, *args, **kwargs
|
||||
)
|
||||
bn = kwargs.get("bn", True)
|
||||
fc_dropout = kwargs.get("fc_dropout", 0.15)
|
||||
@@ -59,7 +59,7 @@ class InceptionEncoder(nn.Module):
|
||||
)
|
||||
self.head = nn.Sequential(
|
||||
# just to make sure we get a spectral norm final layer (after cat)
|
||||
LinBnDropSN(c_out*2, c_out*2, bn=bn, p=fc_dropout),
|
||||
LinBnDropSN(c_out*2, c_out, bn=bn, p=fc_dropout),
|
||||
)
|
||||
|
||||
def forward(self, x):
|
||||
@@ -258,15 +258,17 @@ class MLPEncoder(nn.Module):
|
||||
super().__init__()
|
||||
self.net = INR(
|
||||
in_feats=c_in,
|
||||
out_feats=layer_size,
|
||||
scales=scales,
|
||||
n_fourier_feats=n_fourier_feats,
|
||||
layers=layers,
|
||||
layer_size=layer_size,
|
||||
)
|
||||
self.head = nn.Linear(layer_size, c_out)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Takes in a sequence of shape (batch, sequence, features)
|
||||
and outputs a representation of shape (batch, features)
|
||||
"""
|
||||
return self.net(x)[:, -1]
|
||||
return self.head(self.net(x)[:, -1])
|
||||
|
||||
@@ -31,7 +31,7 @@ class INRLayer(nn.Module):
|
||||
|
||||
|
||||
class INR(nn.Module):
|
||||
def __init__(self, in_feats: int, layers: int, layer_size: int, n_fourier_feats: int, scales: float,
|
||||
def __init__(self, in_feats: int, out_feats:int, layers: int, layer_size: int, n_fourier_feats: int, scales: float,
|
||||
dropout: Optional[float] = 0.1):
|
||||
super().__init__()
|
||||
self.features = nn.Linear(in_feats, layer_size) if n_fourier_feats == 0 \
|
||||
@@ -39,7 +39,7 @@ class INR(nn.Module):
|
||||
in_size = layer_size if n_fourier_feats == 0 \
|
||||
else n_fourier_feats
|
||||
layers = [INRLayer(in_size, layer_size, dropout=dropout)] + \
|
||||
[INRLayer(layer_size, layer_size, dropout=dropout) for _ in range(layers - 1)]
|
||||
[INRLayer(layer_size, out_feats, dropout=dropout) for _ in range(layers - 1)]
|
||||
self.layers = nn.Sequential(*layers)
|
||||
|
||||
def forward(self, x: Tensor) -> Tensor:
|
||||
|
||||
@@ -12,16 +12,16 @@ from torch import Tensor
|
||||
from models.modules.feature_transforms import GaussianFourierFeatureTransform
|
||||
|
||||
from tsai.models.InceptionTimePlus import InceptionTimePlus
|
||||
from .causalinception import CausalInceptionTimePlus, CausalConv1d
|
||||
from .causalinception import CausalInceptionTimePlus, CausalConv1d, Conv
|
||||
|
||||
def custom_head(head_nf, c_out, seq_len):
|
||||
return nn.Sequential(
|
||||
CausalConv1d(head_nf, c_out, 1, bias=False)
|
||||
|
||||
# CausalConv1d(head_nf, c_out, 1, bias=False, norm="Spectral")
|
||||
Conv(head_nf, c_out, 1, bias=False, norm="Spectral"),
|
||||
)
|
||||
|
||||
class INRPlus2(nn.Module):
|
||||
def __init__(self, in_feats: int, layers: int, layer_size: int, n_fourier_feats: int, scales: float,
|
||||
def __init__(self, in_feats: int, out_feats:int ,layers: int, layer_size: int, n_fourier_feats: int, scales: float,
|
||||
dropout: Optional[float] = 0.5, bn=False, *args, **kwargs):
|
||||
super().__init__()
|
||||
self.n_fourier_feats = n_fourier_feats
|
||||
@@ -31,8 +31,8 @@ class INRPlus2(nn.Module):
|
||||
in_size = in_feats if n_fourier_feats == 0 \
|
||||
else n_fourier_feats+in_feats
|
||||
self.layers = CausalInceptionTimePlus(
|
||||
in_size, layer_size, seq_len=None, nf=layer_size, depth=layers,
|
||||
flatten=False, concat_pool=False, fc_dropout=dropout, conv_dropout=0.05, bn=bn, y_range=None, custom_head=custom_head, ks=[139, 19, 3], dilation=2, *args, **kwargs
|
||||
in_size, out_feats, seq_len=None, nf=layer_size, depth=layers,
|
||||
flatten=False, concat_pool=False, fc_dropout=dropout, conv_dropout=dropout/4, bn=bn, y_range=None, custom_head=custom_head, ks=[139, 19, 3], dilation=2, *args, **kwargs
|
||||
)
|
||||
# layers = [INRPlusLayer(in_size, layer_size, dropout=dropout)] + \
|
||||
# [INRPlusLayer(layer_size, layer_size, dropout=dropout) for _ in range(layers - 1)]
|
||||
|
||||
@@ -7,9 +7,16 @@ from models.modules.regressors import RidgeRegressor
|
||||
from models.modules.inr import INR, INRLayer
|
||||
|
||||
class SumHead(nn.Module):
|
||||
def __init__(self, d, c_out=1, ):
|
||||
def __init__(self, d, c_out=1, dropout=0):
|
||||
super().__init__()
|
||||
self.l = nn.Linear(d, c_out) # init a random transform
|
||||
# self.conv = nn.Sequential(
|
||||
# CausalConv1d(head_nf, c_out, 1, bias=False, norm="Spectral"),
|
||||
# )
|
||||
self.l = nn.Sequential(
|
||||
INRLayer(d, d, dropout=dropout),
|
||||
# INRLayer(d, d, dropout=dropout),
|
||||
nn.Linear(d, c_out)
|
||||
) # nn.Linear(d, c_out) # init a random transform
|
||||
|
||||
def forward(self, query, support, support_labels):
|
||||
return self.l(query)
|
||||
@@ -27,7 +34,7 @@ class TransformerHead(nn.Module):
|
||||
INRLayer(c_out, hidden_dim, dropout=0),
|
||||
nn.Linear(hidden_dim, hidden_dim)
|
||||
)
|
||||
self.l = nn.MultiheadAttention(embed_dim=d, num_heads=num_heads, batch_first=True, kdim=d, vdim=hidden_dim, add_bias_kv=True, bias=True)
|
||||
self.l = nn.MultiheadAttention(embed_dim=d, num_heads=num_heads, batch_first=True, kdim=d, vdim=hidden_dim, add_bias_kv=True, bias=True, dropout=0)
|
||||
# after using attention let's decode it
|
||||
self.decoder = nn.Sequential(
|
||||
INRLayer(d, d, dropout=dropout),
|
||||
@@ -40,12 +47,12 @@ class TransformerHead(nn.Module):
|
||||
returns the classification score on the query set.
|
||||
|
||||
Parameters:
|
||||
query: a (tasks_per_batch, n_query, d) Tensor.
|
||||
support: a (tasks_per_batch, n_support, d) Tensor.
|
||||
support_labels: a (tasks_per_batch, n_support) Tensor.
|
||||
n_way: a scalar. Represents the number of classes in a few-shot classification task.
|
||||
n_shot: a scalar. Represents the number of support examples given per class.
|
||||
lambda_reg: a scalar. Represents the strength of L2 regularization.
|
||||
query: a (tasks_per_batch, n_query, d) Tensor.
|
||||
support: a (tasks_per_batch, n_support, d) Tensor.
|
||||
support_labels: a (tasks_per_batch, n_support) Tensor.
|
||||
n_way: a scalar. Represents the number of classes in a few-shot classification task.
|
||||
n_shot: a scalar. Represents the number of support examples given per class.
|
||||
lambda_reg: a scalar. Represents the strength of L2 regularization.
|
||||
Returns: a (tasks_per_batch, n_query, n_way) Tensor.
|
||||
"""
|
||||
# should be (batch, seq, feature)
|
||||
@@ -62,7 +69,7 @@ class RegressionHead(nn.Module):
|
||||
# the regular DeepTime one
|
||||
self.head = RidgeRegressor()
|
||||
elif ("None" in base_learner):
|
||||
self.head = SumHead(d=d)
|
||||
self.head = SumHead(d=d, dropout=dropout)
|
||||
elif ("Transformer" in base_learner):
|
||||
self.head = TransformerHead(d=d, dropout=dropout, num_heads=num_heads)
|
||||
else:
|
||||
|
||||
+411
-325
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user