This commit is contained in:
wassname
2020-11-02 19:37:20 +08:00
parent e08470a589
commit ef54150f71
10 changed files with 6899 additions and 9162 deletions
+2
View File
@@ -6,6 +6,8 @@ outputs/
*Copy*.ipynb
*Untitled*.ipynb
old/
*.hvz
*.html
# Byte-compiled / optimized / DLL files
__pycache__/
+35 -69
View File
@@ -1,97 +1,63 @@
seq2seq-time
==============================
Using sequence to sequence interfaces for timeseries regression
Using sequence to sequence (and normal) interfaces for multivariate timeseries regression.
Since this is a deep learning approach it's hard to do hyperparameter optimisation for every model, so be aware that these are only indicative number. The most interesting results are which models are robust, and which models fail for certain dataset.
NOTE: This is a work in progress, with out final numbers...
<img src="reports/figures/Seq2Seq for regression.png" />
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>BaselineLast</th>
<th>RANP</th>
<th>LSTM</th>
<th>LSTMSeq2Seq</th>
<th>TransformerSeq2Seq</th>
<th>TransformerProcess</th>
</tr>
</thead>
<tbody>
<tr>
<th>IMOSCurrentsVel</th>
<td>1.63</td>
<td>23.31</td>
<td>19.44</td>
<td>14.52</td>
<td>46.98</td>
<td>7.35</td>
</tr>
<tr>
<th>BejingPM25</th>
<td>1.71</td>
<td>1.48</td>
<td>1.41</td>
<td>1.39</td>
<td>2.86</td>
<td>1.44</td>
</tr>
<tr>
<th>GasSensor</th>
<td>1.88</td>
<td>-2.24</td>
<td>16.40</td>
<td>-1.53</td>
<td>NaN</td>
<td>0.63</td>
</tr>
<tr>
<th>AppliancesEnergyPrediction</th>
<td>1.56</td>
<td>1.31</td>
<td>1.94</td>
<td>1.57</td>
<td>2.33</td>
<td>1.08</td>
</tr>
<tr>
<th>MetroInterstateTraffic</th>
<td>1.76</td>
<td>-0.27</td>
<td>-0.17</td>
<td>-0.25</td>
<td>4.15</td>
<td>-0.27</td>
</tr>
</tbody>
</table>
# Results
NOTE: Draft numbers
- [ ] TODO mean over N runs
- [ ] TODO hyperparameter opt to make sure I'm comparing optimal hidden_size
| | MetroInterstateTraffic | BejingPM25 | GasSensor | AppliancesEnergyPrediction | IMOSCurrentsVel | mean(e-e_baseline) |
|:-------------------|-------------------------:|-------------:|------------:|-----------------------------:|------------------:|---------------------:|
| RANP | -0.43 | 1.35 | -2.31 | 1.6 | 1.39 | -1.1 |
| LSTMSeq2Seq | -0.01 | 1.28 | -0.82 | 1.67 | 0.93 | -0.81 |
| Transformer | -0.28 | 1.88 | -2.28 | 2.52 | 1.75 | -0.7 |
| LSTM | 0.03 | 1.35 | -0.38 | 1.66 | 1.37 | -0.61 |
| TransformerProcess | -0.36 | 1.49 | -0.84 | 1.65 | 2.39 | -0.55 |
| InceptionTimeSeq | -0.23 | 1.5 | -0.8 | 2.41 | 2.95 | -0.25 |
| TransformerSeq2Seq | 0.2 | 1.96 | 0.44 | 1.88 | 2.38 | -0.05 |
| BaselineMean | 1.43 | 1.59 | 1.54 | 1.42 | 1.1 | 0 |
| TCNSeq | -0.27 | 2.22 | -0.6 | 5.27 | 0.96 | 0.1 |
| BaselineLast | 1.75 | 1.59 | 1.87 | 1.57 | 0.93 | 0.12 |
## Datasets
To ensure a robust score we use multiple multivariate regression timeseries.
For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb)
For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb) or [notebooks/01.0-mc-datasets/index.html](notebooks/01.0-mc-datasets/index.html)
![](reports/figures/data_batches_appliances.png)
30 minute, current speed at Two Rocks 200m Mooring. Has tidal periods as extra features.
Applience energy usage prediction.
![](reports/figures/data_batches_currents.png)
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
30 minute, current speed at Two Rocks 200m Mooring. Has tidal periods as extra features.
![](reports/figures/data_batches_gas.png)
Hourly PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
![](reports/figures/data_batches_pm25.png)
Hourly PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
![](reports/figures/data_batches_traffic.png)
Hourly Minneapolis-St Paul, MN traffic volume for westbound I-94. Includes weather and holiday features from 2012-2018.
![](reports/figures/data_batches_traffice.png)
## Project Organization
------------
File diff suppressed because one or more lines are too long
+65 -71
View File
@@ -14,17 +14,6 @@
# name: seq2seq-time
# ---
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
# %load_ext autoreload
# %autoreload 2
# %aimport -pandas
# %aimport -torch
# %aimport -numpy
# %aimport -matplotlib
# %aimport -dask
# %aimport -tqdm
# %matplotlib inline
# +
import xarray as xr
import pandas as pd
@@ -34,17 +23,11 @@ import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm
from IPython.display import display, HTML
# -
import warnings
warnings.simplefilter('once')
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
# +
import holoviews as hv
from holoviews import opts
from holoviews.operation.datashader import datashade, dynspread
hv.extension('bokeh', inline=True)
hv.extension('bokeh')
from seq2seq_time.visualization.hv_ggplot import ggplot_theme
hv.renderer('bokeh').theme = ggplot_theme
hv.archive.auto()
@@ -59,15 +42,10 @@ hv.archive.auto()
# ## Parameters
# +
# # device = "cuda" if torch.cuda.is_available() else "cpu"
# print(f'using {device}')
window_past = 48*2
window_future = 48
batch_size = 4
datasets_root = Path('../data/processed/')
# -
# ## Plot helpers
@@ -89,12 +67,12 @@ def plot_batch_y(ds, i):
p *= hv.VLine(now).relabel('now').opts(color='red')
return p
def plot_batches_y(dataset, window_past=window_past, window_future=window_future):
def plot_batches_y(dataset, window_past=window_past, window_future=window_future, n = 4):
ds_name = type(dataset).__name__
opts=dict(width=200, height=100, xaxis=None, yaxis=None)
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
window_future=window_future)
n = 4
max_i = min(len(ds_train), len(ds_val), len(ds_test))
ii = list(np.linspace(0, max_i-10, n-1).astype(int)) + [-1]
l = hv.Layout()
@@ -110,66 +88,63 @@ def plot_batches_y(dataset, window_past=window_past, window_future=window_future
for dataset in datasets:
d = dataset(datasets_root)
display(HTML(f"<h3>{dataset.__name__}</h3>"))
print(d.__doc__)
print(f'{len(d)} rows at freq{d.df.index.freq.freqstr}')
print('columns_forecast', d.columns_forecast)
print('columns_past', d.columns_past)
print('columns_target', d.columns_target)
print
print('Description:', d.__doc__)
print(f'Stats:\n\t{len(d)} rows at freq: "{d.df.index.freq.freqstr}"')
print('\tcolumns_forecast:', d.columns_forecast)
print('\tcolumns_past:', d.columns_past)
print('\tcolumns_target:', d.columns_target)
with pd.option_context("display.max_rows", 4, "display.max_columns", 20):
display(d.df)
display(plot_batches_y(d, n=2).opts(title=''))
# View train, test, val splits
l = hv.Layout()
for dataset in datasets:
d = dataset(datasets_root)
p = dynspread(
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
cmap='red'))
p *= dynspread(
datashade(hv.Scatter(d.df_val[d.columns_target[0]]),
cmap='green'))
p *= dynspread(
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
cmap='blue'))
p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
display(p)
# View train, test, val splits
# View with x and y col
for dataset in datasets:
ds_name = type(dataset).__name__
d = dataset(datasets_root)
print(d)
display(plot_batches_y(d))
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
window_future=window_future)
display(plot_batch_y(ds_train, 10))
# +
# def plot_batch_x(ds, i):
# """Plot input features"""
# x_past, y_past, x_future, y_future = ds.get_rows(i)
# x = pd.concat([x_past, x_future])
# p = hv.NdOverlay({
# col: hv.Curve(x[col]) for col in x.columns
# }, kdims='column')
# now = y_past.index[-1]
# p *= hv.VLine(now).relabel('now').opts(color='red')
# return p
# def plot_batches_x(d):
# """Plot input features for multiple batch"""
# ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
# window_future=window_future)
# l = plot_batch_x(ds_train, 10) + plot_batch_x(ds_val, 10) + plot_batch_x(ds_test, 10)
# l = l.cols(1).opts(shared_axes=False, title=f'{type(d).__name__}')
# return l
# # View train, test, val splits
# for dataset in datasets:
# ds_name = type(dataset).__name__
# d = dataset(datasets_root)
# print(d)
# display(plot_batches_y(d))
# +
# ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
# window_future=window_future)
def plot_batch_x(ds, i):
"""Plot input features"""
x_past, y_past, x_future, y_future = ds.get_rows(i)
x = pd.concat([x_past, x_future])
p = hv.NdOverlay({
col: hv.Curve(x[col]) for col in x.columns
}, kdims='column')
now = y_past.index[-1]
p *= hv.VLine(now).relabel('now').opts(color='red')
return p
def plot_batches_x(d):
"""Plot input features for multiple batch"""
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
window_future=window_future)
l = plot_batch_x(ds_train, 10) + plot_batch_x(ds_val, 10) + plot_batch_x(ds_test, 10)
l = l.cols(1).opts(shared_axes=False, title=f'{type(d).__name__}')
return l
# -
# +
# # View input columns
@@ -181,6 +156,25 @@ for dataset in datasets:
hv.archive.export()
hv.archive.last_export_status()
# +
hv.archive.auto(enabled=False) # We can't capture dynamic plots
# View train, test, val splits
for dataset in datasets:
d = dataset(datasets_root)
p = hv.Layout()
p += dynspread(
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
cmap='red'))
p *= dynspread(
datashade(hv.Scatter(d.df_val[d.columns_target[0]]),
cmap='green'))
p *= dynspread(
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
cmap='blue'))
p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
display(p)
# -
File diff suppressed because one or more lines are too long
-600
View File
@@ -1,600 +0,0 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.6.0
# kernelspec:
# display_name: seq2seq-time
# language: python
# name: seq2seq-time
# ---
# # Sequence to Sequence Models for Timeseries Regression
#
#
# In this notebook we are going to tackle a harder problem:
# - predicting the future on a timeseries
# - by outputing sequence of predictions
# - with rough uncertainty (uncalibrated)
# - using forecasted information (like weather report, week, or cycle of the moon)
#
# Not many papers benchmark movels for multivariate regression, much less seq prediction with uncertainty. So this notebook will try a range of models on a range of dataset.
#
# We do this using a sequence to sqequence interface
#
# <img src="../reports/figures/Seq2Seq for regression.png" />
#
# - [ ] tensorboard / wandb
# - [ ] show test train
# - [ ] val
# - [ ] don't overfit
# - [ ] TCN
# - [ ] make overlap between past and future
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
# %load_ext autoreload
# %autoreload 2
# %aimport -pandas
# %aimport -torch
# %aimport -numpy
# %aimport -matplotlib
# %aimport -dask
# %aimport -tqdm
# %matplotlib inline
import warnings
warnings.simplefilter('once')
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
# +
# Imports
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
import torch
import torch.utils.data
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm
import pytorch_lightning as pl
# +
import holoviews as hv
from holoviews import opts
from holoviews.operation.datashader import datashade, dynspread
hv.extension('bokeh', inline=True)
from seq2seq_time.visualization.hv_ggplot import ggplot_theme
hv.renderer('bokeh').theme = ggplot_theme
# holoview datashader timeseries options
# %opts RGB [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
# %opts Curve [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
# %opts Scatter [width=800 height=200 show_grid=True active_tools=["xwheel_zoom"] default_tools=["xpan","xwheel_zoom", "reset", "hover"] toolbar="right"]
# %opts Layout [width=800 height=200]
# -
from seq2seq_time.data.dataset import Seq2SeqDataSet, Seq2SeqDataSets
from seq2seq_time.predict import predict, predict_multi
from seq2seq_time.util import dset_to_nc
# ## Parameters
# +
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'using {device}')
window_past = 48*2
window_future = 48
batch_size = 16
num_workers = 4
datasets_root = Path('../data/processed/')
window_past
# -
# ## Plot helpers
# +
def hv_plot_std(d: xr.Dataset):
xf = d.t_target
yp = d.y_pred
s = d.y_pred_std
return hv.Spread((xf, yp, s * 2),
label='2*std').opts(alpha=0.5, line_width=0)
def hv_plot_pred(d: xr.Dataset):
# Get arrays
xf = d.t_target
yp = d.y_pred
s = d.y_pred_std
return hv.Curve({'x': xf, 'y': yp})
def hv_plot_true(d: xr.Dataset):
"""Plot a prediction into the future, at a single point in time."""
# Plot true
x = np.concatenate([d.t_past, d.t_target])
yt = np.concatenate([d.y_past, d.y_true])
p = hv.Scatter({
'x': x,
'y': yt
}, label='true').opts(color='black')
now=pd.Timestamp(d.t_source.squeeze().values)
p = p.opts(
ylabel=str(ds_preds.attrs['targets']),
xlabel=f'{now}'
)
# plot a red line for now
p *= hv.VLine(now, label='now').opts(color='red', framewise=True)
return p
def hv_plot_prediction(d):
p = hv_plot_true(d)
p *= hv_plot_pred(d)
p *= hv_plot_std(d)
return p
# +
def plot_performance(ds_preds, full=False):
"""Multiple plots using xr_preds"""
p = hv_plot_prediction(ds_preds.isel(t_source=10))
display(p)
n = len(ds_preds.t_source)
d_ahead = ds_preds.mean(['t_source'])['nll'].groupby('t_ahead_hours').mean()
nll_vs_tahead = (hv.Curve(
(d_ahead.t_ahead_hours,
d_ahead)).redim(x='hours ahead',
y='nll').opts(
title=f'NLL vs time ahead (no. samples={n})'))
display(nll_vs_tahead)
# Make a plot of the NLL over time. Does this solution get worse with time?
if full:
d_source = ds_preds.mean(['t_ahead'])['nll'].groupby('t_source').mean()
nll_vs_time = (hv.Curve(d_source).opts(
title='Error vs time of prediction'))
display(nll_vs_time)
# A scatter plot is easy with xarray
if full:
tlim = (ds_preds.y_true.min().item(), ds_preds.y_true.max().item())
true_vs_pred = datashade(hv.Scatter(
(ds_preds.y_true,
ds_preds.y_pred))).redim(x='true', y='pred').opts(width=400,
height=400,
xlim=tlim,
ylim=tlim,
title='Scatter plot')
true_vs_pred = dynspread(true_vs_pred)
true_vs_pred
display(true_vs_pred)
def plot_hist(trainer):
try:
df_hist = pd.read_csv(trainer.logger.experiment.metrics_file_path)
df_hist['epoch'] = df_hist['epoch'].ffill()
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
if len(df_histe)>1:
p = hv.Curve(df_histe, kdims=['epoch'], vdims=['loss/train']).relabel('train')
p *= hv.Curve(df_histe, kdims=['epoch'], vdims=['loss/val']).relabel('val')
display(p.opts(ylabel='loss'))
return df_histe
except Exception as e:
print(e)
pass
# +
def df_bold_min(data):
'''
highlight the maximum in a Series or DataFrame
Usage:
`df.style.apply(df_bold_min)`
'''
attr = 'font-weight: bold'
#remove % and cast to float
data = data.replace('%','', regex=True).astype(float)
if data.ndim == 1: # Series from .apply(axis=0) or axis=1
is_min = data == data.min()
return [attr if v else '' for v in is_min]
else: # from .apply(axis=None)
is_min = data == data.min().min()
return pd.DataFrame(np.where(is_min, attr, ''),
index=data.index, columns=data.columns)
def format_results(results, metric=None):
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()}).T
if metric:
return df_results.xs(metric, axis=1, level=1).rename_axis(columns=metric)
return df_results
def display_results(results, metric='nll', strformat="{:.2f}"):
df_results = format_results(results, metric=metric)
# display metric
display(df_results
.style.format(strformat)
.apply(df_bold_min)
)
# -
# ## Datasets
# +
from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction, BejingPM25, GasSensor, MetroInterstateTraffic
datasets = [BejingPM25, GasSensor, AppliancesEnergyPrediction, MetroInterstateTraffic, IMOSCurrentsVel]
datasets
# -
# View train, test, val splits
l = hv.Layout()
for dataset in datasets:
d = dataset(datasets_root)
p = dynspread(
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
cmap='red'))
p *= dynspread(
datashade(hv.Scatter(d.df_val[d.columns_target[0]]),
cmap='green'))
p *= dynspread(
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
cmap='blue'))
p = p.opts(title=f"{dataset}")
display(p)
# ## Lightning
# +
import pytorch_lightning as pl
class PL_MODEL(pl.LightningModule):
def __init__(self, model, lr=3e-4, patience=None, weight_decay=0):
super().__init__()
self._model = model
self.lr = lr
self.patience = patience
self.weight_decay = weight_decay
def forward(self, x_past, y_past, x_future, y_future=None):
"""Eval/Predict"""
y_dist, extra = self._model(x_past, y_past, x_future, y_future)
assert torch.isfinite(y_dist.loc).all(), 'output should be finite'
return y_dist, extra
def training_step(self, batch, batch_idx, phase='train'):
x_past, y_past, x_future, y_future = batch
y_dist, extra = self.forward(*batch)
loss = -y_dist.log_prob(y_future).mean()
assert torch.isfinite(loss).all(), 'loss should be finite'
self.log_dict({f'loss/{phase}':loss})
if ('loss' in extra) and (phase=='train'):
# some models have a special loss
loss = extra['loss']
self.log_dict({f'model_loss/{phase}':loss})
return loss
def validation_step(self, batch, batch_idx):
return self.training_step(batch, batch_idx, phase='val')
def configure_optimizers(self):
optim = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim,
patience=self.patience,
verbose=True,
min_lr=1e-7,
) if self.patience else None
return {'optimizer': optim, 'lr_scheduler': scheduler, 'monitor': 'loss/val'}
# -
# # Run
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# # Models
# +
from seq2seq_time.models.lstm_seq2seq import LSTMSeq2Seq
from seq2seq_time.models.lstm_seq import LSTMSeq
from seq2seq_time.models.lstm import LSTM
from seq2seq_time.models.baseline import BaselineLast
from seq2seq_time.models.transformer import Transformer
from seq2seq_time.models.transformer_autor import TransformerAutoR
from seq2seq_time.models.transformer_seq2seq import TransformerSeq2Seq
from seq2seq_time.models.transformer_seq import TransformerSeq
from seq2seq_time.models.neural_process import RANP
from seq2seq_time.models.transformer_process import TransformerProcess
from seq2seq_time.models.tcn import TCNSeq2Seq
# ## Plots
# +
import gc
def free_mem():
gc.collect()
torch.cuda.empty_cache()
gc.collect()
# +
hidden_size = 16
dropout=0.0
layers=8
nhead=2
models = [
lambda xs, ys: BaselineLast(),
# lambda xs, ys: TransformerAutoR(xs,
# ys, hidden_out_size=hidden_size),
lambda xs, ys: RANP(xs,
ys, hidden_dim=hidden_size, dropout=dropout,
latent_dim=hidden_size//4, n_decoder_layers=layers),
# lambda xs, ys: LSTM(xs,
# ys,
# hidden_size=hidden_size,
# lstm_layers=layers,
# lstm_dropout=dropout),
# lambda xs, ys: LSTMSeq2Seq(xs,
# ys,
# hidden_size=hidden_size,
# lstm_layers=layers,
# lstm_dropout=dropout),
# lambda xs, ys: TransformerSeq2Seq(xs,
# ys,
# hidden_size=hidden_size,
# nhead=nhead,
# nlayers=layers,
# attention_dropout=dropout),
lambda xs, ys: Transformer(xs,
ys,
attention_dropout=dropout,
nhead=nhead,
nlayers=layers,
hidden_size=hidden_size),
# lambda xs, ys:TransformerProcess(xs,
# ys, hidden_size=hidden_size,
# latent_dim=hidden_size//4, dropout=dropout,
# nlayers=layers,)
lambda xs, ys:TCNSeq2Seq(xs, ys, hidden_size=hidden_size, nlayers=layers, dropout=dropout)
]
# models
# -
# +
# Summarize each models shape and weights
Dataset = datasets[0]
dataset = Dataset(datasets_root)
ds_train, ds_val, ds_test = dataset.to_datasets(window_past=window_past,
window_future=window_future)
dl_val = DataLoader(ds_val, batch_size=batch_size)
x_past, y_past, x_future, y_future = next(iter(dl_val))
xs = x_past.shape[-1]
ys = y_future.shape[-1]
from seq2seq_time.torchsummaryX import summary
sizes=[]
for m_fn in models:
pt_model = m_fn(xs, ys)
model_name = type(pt_model).__name__
with torch.no_grad():
df_summary, df_total = summary(pt_model, x_past, y_past, x_future, y_future, print_summary=False)
sizes.append(df_total.rename(columns={'Totals':model_name}))
df_model_sizes = pd.concat(sizes, 1)
df_model_sizes.style.format(pd.io.formats.format.EngFormatter(use_eng_prefix=True))
# -
# ## Train
from collections import defaultdict
results = defaultdict(dict)
from seq2seq_time.metrics import rmse, smape
# +
for Dataset in datasets:
dataset_name = Dataset.__name__
dataset = Dataset(datasets_root)
ds_train, ds_val, ds_test = dataset.to_datasets(window_past=window_past,
window_future=window_future)
# Init data
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
xs = x_past.shape[-1]
ys = y_future.shape[-1]
# Loaders
dl_train = DataLoader(ds_train,
batch_size=batch_size,
shuffle=True,
pin_memory=num_workers == 0,
num_workers=num_workers)
dl_val = DataLoader(ds_val,
shuffle=True,
batch_size=batch_size,
num_workers=num_workers)
for m_fn in models:
try:
free_mem()
pt_model = m_fn(xs, ys)
model_name = type(pt_model).__name__
print(dataset_name, model_name)
# Wrap in lightning
patience = 5
model = PL_MODEL(pt_model,
lr=3e-4, patience=patience,
# weight_decay=4e-5
).to(device)
# Trainer
trainer = pl.Trainer(
gpus=1,
min_epochs=2,
max_epochs=300,
amp_level='O1',
precision=16,
limit_train_batches=1200,
limit_val_batches=250,
logger=CSVLogger("../outputs", name=f'{dataset_name}_{model_name}'),
callbacks=[
EarlyStopping(monitor='loss/val', patience=patience * 2),
],
)
# Train
trainer.fit(model, dl_train, dl_val)
ds_preds = predict(model.to(device),
ds_test,
batch_size * 2,
device=device,
scaler=dataset.output_scaler)
print(dataset_name, model_name)
print(f'mean_NLL {ds_preds.nll.mean().item():2.2f}')
loss = ds_preds.nll.mean().item()
# Performance TODO tensorboard, wandb
print(plot_hist(trainer))
plot_performance(ds_preds)
metrics = dict(
rmse=rmse(ds_preds.y_true, ds_preds.y_pred).item(),
smape=smape(ds_preds.y_true, ds_preds.y_pred).item(),
nll=ds_preds.nll.mean().item()
)
results[dataset_name][model_name] = metrics
display_results(results, 'nll')
dset_to_nc(ds_preds, Path(trainer.logger.experiment.log_dir)/'ds_preds.nc')
model.cpu()
except Exception as e:
logging.exception('failed to run model')
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display(df_results)
# -
# # Leaderboard
print(f'Negative Log-Likelihood (NLL).\nover {window_future} steps')
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display_results(results, 'nll')
# # Plots
# +
# Load saved preds
ds_predss = defaultdict(dict)
for Dataset in datasets:
dataset_name = Dataset.__name__
for m_fn in models:
pt_model = m_fn(xs, ys)
model_name = type(pt_model).__name__
checkpoint_name = f"{dataset_name}_{model_name}"
save_dir = Path(f"../outputs")/checkpoint_name
fs = sorted(save_dir.glob("**/ds_preds.nc"))
if len(fs)>0:
ds_preds = xr.open_dataset(fs[-1])
ds_predss[dataset_name][model_name] = ds_preds
# -
data_i = 100
# Plot mean of predictions
n = hv.Layout()
for dataset in ds_predss.keys():
d = next(iter(ds_predss[dataset].values())).isel(t_source=data_i)
p = hv_plot_true(d)
for model in results[dataset].keys():
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_source=data_i)
p *= hv_plot_pred(d).relabel(label=f"{model}")
n += p.opts(title=dataset, legend_position='top_left')
n.cols(1).opts(shared_axes=False)
dataset='BejingPM25'
n = hv.Layout()
for i, model in enumerate(ds_predss[dataset].keys()):
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_source=data_i)
p = hv_plot_true(d)
p *= hv_plot_pred(d).relabel('pred')
p *= hv_plot_std(d)
n += p.opts(title=f'{dataset} {model}', legend_position='top_left')
n.cols(1)
plot_performance(ds_preds, full=True)
def plot_at_i(data_i):
d = ds_preds.isel(t_source=data_i)
return hv_plot_prediction(d).relabel(label=f"{model}")
dmap = hv.DynamicMap(plot_at_i, kdims=['t_source'])
t = ds_preds.t_source.values
dmap = dmap.redim.values(t_source=range(len(t)))
dmap.opts(framewise=True)
# Plot series of predictions
t_ahead_i=6
d = ds_preds.isel(t_ahead=t_ahead_i)
p = datashade(hv.Scatter({
'x': d.t_target,
'y': d.y_true
}, label='true').opts(color='black'), cmap='black')
p *= datashade(hv.Curve({'x': d.t_target, 'y':d.y_pred}), cmap='blue')
p.opts(title=f'ahead by {d.freq} * {t_ahead_i}')
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -30,12 +30,11 @@
# <img src="../reports/figures/Seq2Seq for regression.png" />
#
# - [ ] tensorboard / wandb
# - [ ] show test train
# - [ ] val
# - [ ] don't overfit
# - [ ] TCN
# - [ ] make overlap between past and future
# - bejing data has a problem
# - Current?
# - [ ] make overlap between past and future?
# - [ ] do n=5 runs
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
# %load_ext autoreload
@@ -86,6 +85,7 @@ from seq2seq_time.data.dataset import Seq2SeqDataSet, Seq2SeqDataSets
from seq2seq_time.predict import predict, predict_multi
from seq2seq_time.util import dset_to_nc
# +
import logging
import warnings
import seq2seq_time.silence
@@ -95,6 +95,10 @@ warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', 'Consider increasing the value of the `num_workers` argument', UserWarning)
warnings.filterwarnings('ignore', 'Your val_dataloader has `shuffle=True`', UserWarning)
from pytorch_lightning import _logger as log
log.setLevel(logging.WARN)
# -
# ## Parameters
# +
@@ -196,13 +200,20 @@ def plot_performance(ds_preds, full=False):
true_vs_pred
display(true_vs_pred)
def read_hist(trainer: pl.Trainer):
metrics_file_path = Path(trainer.logger.experiment[-1].log_dir)/'..'/'metrics.csv'
try:
df_hist = pd.read_csv(metrics_file_path)
df_hist['epoch'] = df_hist['epoch'].ffill()
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
return df_histe
except Exception as e:
print(e)
def plot_hist(trainer: pl.Trainer):
"""If you used a CSVLogger you can load and plot history here"""
try:
df_hist = pd.read_csv(trainer.logger.experiment.metrics_file_path)
df_hist['epoch'] = df_hist['epoch'].ffill()
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
df_histe = read_hist(trainer)
if len(df_histe)>1:
p = hv.Curve(df_histe, kdims=['epoch'], vdims=['loss/train']).relabel('train')
p *= hv.Curve(df_histe, kdims=['epoch'], vdims=['loss/val']).relabel('val')
@@ -235,14 +246,17 @@ def df_bold_min(data):
return pd.DataFrame(np.where(is_min, attr, ''),
index=data.index, columns=data.columns)
def format_results(results, metric=None):
def format_results(results, metric=None, sort=True):
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()}).T
if metric:
return df_results.xs(metric, axis=1, level=1).rename_axis(columns=metric)
df_results = df_results.xs(metric, axis=1, level=1).rename_axis(columns=metric)
if sort is True:
df_results['mean(e-e_baseline)'] = (df_results - df_results.T.BaselineMean).mean(1)
df_results = df_results.sort_values('mean(e-e_baseline)')
return df_results
def display_results(results, metric='nll', strformat="{:.2f}"):
df_results = format_results(results, metric=metric)
def display_results(results, metric='nll', strformat="{:.2f}", sort=True):
df_results = format_results(results, metric=metric, sort=sort)
# display metric
display(df_results
@@ -259,27 +273,29 @@ def display_results(results, metric='nll', strformat="{:.2f}"):
# Some such as MetroInterstateTraffic are easier, some are periodic such as BejingPM25, some are conditional on inputs such as GasSensor, and some are noisy and periodic like IMOSCurrentsVel
from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction, BejingPM25, GasSensor, MetroInterstateTraffic
datasets = [MetroInterstateTraffic, BejingPM25, GasSensor, AppliancesEnergyPrediction, IMOSCurrentsVel]
datasets = [GasSensor, IMOSCurrentsVel, AppliancesEnergyPrediction, BejingPM25, MetroInterstateTraffic]
datasets
# View train, test, val splits
l = hv.Layout()
for dataset in datasets:
d = dataset(datasets_root)
# +
# # View train, test, val splits
# l = hv.Layout()
# for dataset in datasets:
# d = dataset(datasets_root)
p = dynspread(
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
cmap='red'))
p *= dynspread(
datashade(hv.Scatter(d.df_val[d.columns_target[0]]),
cmap='green'))
p *= dynspread(
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
cmap='blue'))
p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
display(p)
# p = dynspread(
# datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
# cmap='red'))
# p *= dynspread(
# datashade(hv.Scatter(d.df_val[d.columns_target[0]]),
# cmap='green'))
# p *= dynspread(
# datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
# cmap='blue'))
# p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
# display(p)
# -
# ## Lightning
#
@@ -329,8 +345,9 @@ class PL_MODEL(pl.LightningModule):
# -
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.loggers import CSVLogger, WandbLogger, TensorBoardLogger, TestTubeLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import LearningRateMonitor
# ## Models
@@ -352,58 +369,65 @@ def free_mem():
gc.collect()
torch.cuda.empty_cache()
gc.collect()
# -
# +
hidden_size = 16
# PARAMS: model
## Some datasets are easier, so we will vary the hidden size to predict overfitting
hidden_size={'IMOSCurrentsVel': 6, #?
'AppliancesEnergyPrediction': 6, # ?
'BejingPM25': 8, # OK
'GasSensor': 8, # OK
'MetroInterstateTraffic': 16 # OK
}
dropout=0.0
layers=6
nhead=2
models = [
lambda xs, ys: BaselineLast(),
lambda xs, ys: BaselineMean(),
lambda xs, ys: Transformer(xs,
# lambda xs, ys: BaselineLast(),
lambda xs, ys, hidden_size: BaselineMean(),
lambda xs, ys, hidden_size: Transformer(xs,
ys,
attention_dropout=dropout,
nhead=nhead*2,
nlayers=layers,
hidden_size=hidden_size),
lambda xs, ys:TransformerProcess(xs,
lambda xs, ys, hidden_size:TransformerProcess(xs,
ys, hidden_size=hidden_size, nhead=nhead,
latent_dim=hidden_size//2, dropout=dropout,
nlayers=layers),
lambda xs, ys:TCNSeq(xs, ys, hidden_size=hidden_size*2, nlayers=layers, dropout=dropout, kernel_size=2),
lambda xs, ys: RANP(xs,
lambda xs, ys, hidden_size:TCNSeq(xs, ys, hidden_size=hidden_size, nlayers=layers, dropout=dropout, kernel_size=2),
lambda xs, ys, hidden_size: RANP(xs,
ys, hidden_dim=hidden_size, dropout=dropout,
latent_dim=hidden_size//2, n_decoder_layers=layers),
lambda xs, ys: TransformerSeq2Seq(xs,
lambda xs, ys, hidden_size: TransformerSeq2Seq(xs,
ys,
hidden_size=hidden_size,
nhead=nhead,
nlayers=layers,
attention_dropout=dropout
),
lambda xs, ys: LSTM(xs,
lambda xs, ys, hidden_size: LSTM(xs,
ys,
hidden_size=hidden_size*2,
lstm_layers=layers,
hidden_size=hidden_size,
lstm_layers=layers//2,
lstm_dropout=dropout),
lambda xs, ys: LSTMSeq2Seq(xs,
lambda xs, ys, hidden_size: LSTMSeq2Seq(xs,
ys,
hidden_size=hidden_size,
lstm_layers=layers,
lstm_dropout=dropout),
lambda xs, ys: CrossAttention(xs,
lambda xs, ys, hidden_size: CrossAttention(xs,
ys,
hidden_size=hidden_size,),
lambda xs, ys: InceptionTimeSeq(xs,
lambda xs, ys, hidden_size: InceptionTimeSeq(xs,
ys,
layers=layers,
kernel_size=96,
layers=layers//2,
hidden_size=hidden_size,
bottleneck=hidden_size//4)
@@ -430,28 +454,36 @@ ys = y_future.shape[-1]
# summary of each model
sizes=[]
for m_fn in models:
pt_model = m_fn(xs, ys).eval().to(device)
pt_model = m_fn(xs, ys, 16).eval().to(device)
model_name = type(pt_model).__name__
with torch.no_grad():
df_summary, df_total = summary(pt_model, x_past, y_past, x_future, y_future, print_summary=False)
sizes.append(df_total.rename(columns={'Totals':model_name}))
df_model_sizes = pd.concat(sizes, 1).T
df_model_sizes.style.format(pd.io.formats.format.EngFormatter(use_eng_prefix=True))
# Human readable numbers
fmt=pd.io.formats.format.EngFormatter(use_eng_prefix=True)
df_model_sizes_hr = df_model_sizes.apply(lambda x:x.apply(fmt))
df_model_sizes_hr.to_markdown(open(f'../outputs/{timestamp}_models.md', 'w'))
df_model_sizes_hr
# -
# ## Train
from collections import defaultdict
from seq2seq_time.metrics import rmse, smape
results = defaultdict(dict)
max_iters=20000
tensorboard_dir = Path(f"../outputs/{timestamp}").resolve()
print(f'For tensorboard run:\ntensorboard --logdir="{tensorboard_dir}"')
# +
for Dataset in datasets:
results = defaultdict(dict)
for Dataset in tqdm(datasets, desc='datasets'):
dataset_name = Dataset.__name__
dataset = Dataset(datasets_root)
ds_train, ds_val, ds_test = dataset.to_datasets(window_past=window_past,
@@ -473,34 +505,45 @@ for Dataset in datasets:
batch_size=batch_size,
num_workers=num_workers)
for m_fn in models:
for m_fn in tqdm(models, desc=f'models ({dataset_name})'):
try:
free_mem()
pt_model = m_fn(xs, ys)
pt_model = m_fn(xs, ys, hidden_size[dataset_name])
model_name = type(pt_model).__name__
print(dataset_name, model_name)
print(timestamp, dataset_name, model_name)
# Wrap in lightning
patience = 2
model = PL_MODEL(pt_model,
lr=1e-3, patience=patience,
lr=3e-4, patience=patience,
# weight_decay=4e-5
).to(device)
# Trainer
save_dir = f"../outputs/{timestamp}/{dataset_name}"
name =f'{model_name}'
trainer = pl.Trainer(
gpus=1,
# Training length
min_epochs=2,
max_epochs=100,
amp_level='O1',
precision=16,
gradient_clip_val=20,
terminate_on_nan=True,
limit_train_batches=max_iters//batch_size,
limit_val_batches=max_iters//batch_size//5,
logger=CSVLogger("../outputs", name=f'{timestamp}_{dataset_name}_{model_name}'),
# Misc
gradient_clip_val=20,
terminate_on_nan=True,
# GPU
gpus=1,
amp_level='O1',
precision=16,
# Logging
default_root_dir=save_dir,
logger=[
TestTubeLogger(name=name, save_dir=save_dir)
],
# Callbacks
callbacks=[
EarlyStopping(monitor='loss/val', patience=patience * 2),
LearningRateMonitor(logging_interval='epoch')
],
)
@@ -513,12 +556,7 @@ for Dataset in datasets:
device=device,
scaler=dataset.output_scaler)
# print(dataset_name, model_name)
# print(f'mean_NLL {ds_preds.nll.mean().item():2.2f}')
# loss = ds_preds.nll.mean().item()
# print(plot_hist(trainer))
# plot_performance(ds_preds)
# display(read_hist(trainer))
metrics = dict(
rmse=rmse(ds_preds.y_true, ds_preds.y_pred).item(),
@@ -526,9 +564,10 @@ for Dataset in datasets:
nll=ds_preds.nll.mean().item()
)
results[dataset_name][model_name] = metrics
display_results(results, 'nll')
display_results(results, 'nll', sort=False)
dset_to_nc(ds_preds, Path(trainer.logger.experiment.log_dir)/'ds_preds.nc')
pred_path = Path(trainer.logger.experiment[-1].log_dir)/'..'/'ds_preds.nc'
dset_to_nc(ds_preds, pred_path)
model.cpu()
except Exception as e:
logging.exception('failed to run model')
@@ -536,15 +575,28 @@ for Dataset in datasets:
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display(df_results)
# -
# # Leaderboard
print(f'Negative Log-Likelihood (NLL).\nover {window_future} steps')
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display_results(results, 'nll')
def results_html(results, metric='nll', strformat="{:.2f}"):
df_results = format_results(results, metric=metric)
f = f'../outputs/{timestamp}_leaderboard.html'
print('saved to', f)
df_results.to_html(f, float_format=lambda n:strformat.format(n))
f = f'../outputs/{timestamp}_leaderboard.md'
print(f)
df_results.round(2).to_markdown(open(f, 'w'))
results_html(results, 'nll')
# # Plots
#
# - TODO make legends smaller
# - TODO either many batches, or plot of X steps ahead
# +
@@ -553,32 +605,35 @@ ds_predss = defaultdict(dict)
for Dataset in datasets:
dataset_name = Dataset.__name__
for m_fn in models:
pt_model = m_fn(xs, ys)
pt_model = m_fn(xs, ys, hidden_size[dataset_name])
model_name = type(pt_model).__name__
save_dir = Path(f"../outputs")/timestamp/dataset_name/model_name
checkpoint_name = f"{timestamp}_{dataset_name}_{model_name}"
save_dir = Path(f"../outputs")/checkpoint_name
# Get latest checkpoint
fs = sorted(save_dir.glob("**/ds_preds.nc"))
if len(fs)>0:
ds_preds = xr.open_dataset(fs[-1])
ds_predss[dataset_name][model_name] = ds_preds
# -
data_i = 100
data_i = 300
# Plot mean of predictions
n = hv.Layout()
for dataset in ds_predss.keys():
d = next(iter(ds_predss[dataset].values())).isel(t_source=data_i)
p = hv_plot_true(d)
for model in results[dataset].keys():
for model in ds_predss[dataset].keys():
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_source=data_i)
p *= hv_plot_pred(d).relabel(label=f"{model}")
n += p.opts(title=dataset, legend_position='top_left')
n.cols(1).opts(shared_axes=False)
dataset='BejingPM25'
1/0
dataset='IMOSCurrentsVel'
data_i=844
n = hv.Layout()
for i, model in enumerate(ds_predss[dataset].keys()):
ds_preds = ds_predss[dataset][model]
@@ -589,68 +644,122 @@ for i, model in enumerate(ds_predss[dataset].keys()):
n += p.opts(title=f'{dataset} {model}', legend_position='top_left')
n.cols(1)
plot_performance(ds_preds, full=True)
def plot_at_i(data_i):
d = ds_preds.isel(t_source=data_i)
return hv_plot_prediction(d).relabel(label=f"{model}")
dmap = hv.DynamicMap(plot_at_i, kdims=['t_source'])
t = ds_preds.t_source.values
dmap = dmap.redim.values(t_source=range(len(t)))
dmap.opts(framewise=True)
# Plot series of predictions
t_ahead_i=6
d = ds_preds.isel(t_ahead=t_ahead_i)
p = datashade(hv.Scatter({
'x': d.t_target,
'y': d.y_true
}, label='true').opts(color='black'), cmap='black')
p *= datashade(hv.Curve({'x': d.t_target, 'y':d.y_pred}), cmap='blue')
p.opts(title=f'ahead by {d.freq} * {t_ahead_i}')
# # DEBUG
# +
# # Summarize each models weights, and sanity check them
# dataset = datasets[0](datasets_root)
# ds_train, ds_val, ds_test = dataset.to_datasets(window_past=window_past,
# window_future=window_future)
# dl_val = DataLoader(ds_val, batch_size=batch_size)
# x_past, y_past, x_future, y_future = next(iter(dl_val))
# xs = x_past.shape[-1]
# ys = y_future.shape[-1]
# plot_performance(ds_preds, full=True)
# for m_fn in models:
# +
def plot_at_i(time_i, dataset, model):
d = ds_predss[dataset][model].isel(t_source=time_i)
return hv_plot_prediction(d).relabel(label=f"{model}")
# pt_model = m_fn(xs, ys)
dmap = hv.DynamicMap(plot_at_i, kdims=['t_source', 'dataset', 'model'])
t = ds_preds.t_source.values
models = list(next(iter(ds_predss.values())).keys())
dmap = dmap.redim.values(
t_source=range(len(t)),
dataset=list(ds_predss.keys()),
model=models,
)
dmap.opts(framewise=True)
# -
# # Wrap in lightning
# model = PL_MODEL(pt_model)
1/0
# # Trainer
# free_mem()
# trainer = pl.Trainer(
# gpus=1,
# fast_dev_run=True,
# gradient_clip_val=4,
# progress_bar_refresh_rate=0,
# )
# # Train
# trainer.fit(model, dl_val)
# +
# Explore predictions with dynamic map
def plot_predictions_ahead(dataset='IMOSCurrentsVel', t_ahead_i=6, start=0, window_steps=1800):
d = next(iter(ds_predss[dataset].values())).isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
p = hv.Scatter({
'x': d.t_target,
'y': d.y_true
}, label='true').opts(color='black', framewise=True)
for model in results[dataset].keys():
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
p *= hv.Curve({'x': d.t_target, 'y':d.y_pred}, label=model).relabel(label=f"{model}")
p = p.opts(title=f"Dataset: {dataset}, {d.freq}*{t_ahead_i} ahead", height=250, legend_position='top', ylabel=d.targets)
return p.opts(framewise=True)
dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 't_ahead_i', 'start', 'window_steps'])
dmap = dmap.redim.values(dataset=list(ds_predss.keys()))
dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
dmap = dmap.redim.default(t_ahead_i=10, window_steps=800)
dmap
# -
1/0
# +
# Explore predictions with dynamic map
def plot_predictions_ahead(dataset='IMOSCurrentsVel', model='', t_ahead_i=6, start=0, window_steps=1800):
d = next(iter(ds_predss[dataset].values())).isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
p = hv.Scatter({
'x': d.t_target,
'y': d.y_true
}, label='true').opts(color='black', framewise=True)
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
x = d.t_target
y = d.y_pred
s = d.y_pred_std
p *= hv.Curve({'x': x, 'y':y}, label=model).relabel(label=f"{model}")
p *= hv.Spread((x, y, s * 2),
label='2*std').opts(alpha=0.5, line_width=0)
p = p.opts(title=f"Dataset: {dataset}, model={model}, {d.freq}*{t_ahead_i} ahead", height=250, legend_position='top', ylabel=d.targets)
return p.opts(framewise=True)
dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 'model', 't_ahead_i', 'start', 'window_steps'])
dmap = dmap.redim.values(dataset=list(ds_predss.keys()), model=models)
dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
dmap = dmap.redim.default(t_ahead_i=10, window_steps=1000)
dmap
# -
# +
# Explore predictions with dynamic map
def plot_predictions_ahead(dataset='IMOSCurrentsVel', t_ahead_i=6, start=0, window_steps=1800):
ds_preds = ds_predss[dataset]
l = hv.Layout()
for model in ds_preds.keys():
d = ds_preds[model].isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
x = d.t_target
y = d.y_pred
s = d.y_pred_std
p = hv.Scatter({
'x': d.t_target,
'y': d.y_true
}).opts(color='black')
p *= hv.Curve({'x': x, 'y':y})
p *= hv.Spread((x, y, s * 2)).opts(alpha=0.5, line_width=0)
l += p.opts(title=f"Dataset: {dataset}, model={model}, {d.freq}*{t_ahead_i} ahead", height=250, legend_position='top', ylabel=d.targets)
return l.cols(1)
dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 't_ahead_i', 'start', 'window_steps'])
dmap = dmap.redim.values(dataset=list(ds_predss.keys()))
dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
dmap = dmap.redim.default(t_ahead_i=10, window_steps=400)
dmap
# -
+5 -6
View File
@@ -8,10 +8,9 @@ class CrossAttention(nn.Module):
"""
A single transformer, using cross attention, like in the determistic encoder in attentive neural processes.
"""
def __init__(self, x_dim, y_dim, attention_dropout=0, nhead=8, nlayers=8, hidden_size=32, nan_value=0, min_std=0.01):
def __init__(self, x_dim, y_dim, attention_dropout=0, nhead=8, nlayers=8, hidden_size=32, min_std=0.01):
super().__init__()
self._min_std = min_std
self.nan_value = nan_value
enc_x_dim = x_dim + y_dim
self.v_encoder = nn.Linear(enc_x_dim, hidden_size)
@@ -59,10 +58,10 @@ class CrossAttention(nn.Module):
q = self.q_encoder(future_x).permute(1, 0, 2)
v = self.v_encoder(context).permute(1, 0, 2)
# Self attention with causal mask
v = self.self_attn_v(v, v, v, attn_mask=past_causal_mask)[0]
q = self.self_attn_q(q, q, q, attn_mask=future_causal_mask)[0]
k = self.self_attn_k(k, k, k, attn_mask=past_causal_mask)[0]
# # Self attention with causal mask
# v = self.self_attn_v(v, v, v, attn_mask=past_causal_mask)[0]
# q = self.self_attn_q(q, q, q, attn_mask=future_causal_mask)[0]
# k = self.self_attn_k(k, k, k, attn_mask=past_causal_mask)[0]
# Cross attention
h = self.cross_attn(query=q, key=k, value=v)[0]