This commit is contained in:
wassname
2020-10-27 06:43:50 +08:00
parent 6eda47b76f
commit 052fd6596c
9 changed files with 2920 additions and 21 deletions
+1
View File
@@ -1,6 +1,7 @@
lightning_logs/
dataset_folder/
logs/
outputs/
# Byte-compiled / optimized / DLL files
__pycache__/
File diff suppressed because one or more lines are too long
+409
View File
@@ -0,0 +1,409 @@
# -*- coding: utf-8 -*-
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:light
# text_representation:
# extension: .py
# format_name: light
# format_version: '1.5'
# jupytext_version: 1.6.0
# kernelspec:
# display_name: seq2seq-time
# language: python
# name: seq2seq-time
# ---
# # Sequence to Sequence Models for Timeseries Regression
#
#
# In this notebook we are going to tackle a harder problem:
# - predicting the future on a timeseries
# - using an LSTM
# - with rough uncertainty (uncalibrated)
# - outputing sequence of predictions
#
# <img src="../reports/figures/Seq2Seq for regression.png" />
#
#
# https://medium.com/@boitemailjeanmid/smart-meters-in-london-part1-description-and-first-insights-jean-michel-d-db97af2de71b
#
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
# %load_ext autoreload
# %autoreload 2
# %aimport -pandas
# %aimport -torch
# %aimport -numpy
# %aimport -matplotlib
# %aimport -dask
# %aimport -tqdm
# %matplotlib inline
# +
# Imports
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
import torch
import torch.utils.data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12.0, 3.0)
plt.style.use('ggplot')
from pathlib import Path
from tqdm.auto import tqdm
import pytorch_lightning as pl
# -
import warnings
warnings.simplefilter('once')
from seq2seq_time.data.dataset import Seq2SeqDataSet, Seq2SeqDataSets
from seq2seq_time.predict import predict, predict_multi
from seq2seq_time.util import dset_to_nc
import logging, sys
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
import holoviews as hv
from holoviews import opts
from holoviews.operation.datashader import datashade, dynspread
hv.extension('bokeh')
# ## Parameters
# +
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f'using {device}')
columns_target=['energy(kWh/hh)']
window_past = 48*2
window_future = 48*2
batch_size = 128
num_workers = 5
freq = '30T'
max_rows = 5e5
datasets_root = Path('../data/processed/')
# -
# ## Plot helpers
# +
def plot_prediction(ds_preds, i):
"""Plot a prediction into the future, at a single point in time."""
d = ds_preds.isel(t_source=i)
# Get arrays
xf = d.t_target
yp = d.y_pred
s = d.y_pred_std
yt = d.y_true
now = d.t_source.squeeze()
plt.figure(figsize=(12, 4))
plt.scatter(xf, yt, label='true', c='k', s=6)
ylim = plt.ylim()
# plot prediction
plt.fill_between(xf, yp-2*s, yp+2*s, alpha=0.25,
facecolor="b",
interpolate=True,
label="2 std",)
plt.plot(xf, yp, label='pred', c='b')
# plot true
plt.scatter(
d.t_past,
d.y_past,
c='k',
s=6
)
# plot a red line for now
plt.vlines(x=now, ymin=0, ymax=1, label='now', color='r')
plt.ylim(*ylim)
now=pd.Timestamp(now.values)
plt.title(f'Prediction NLL={d.nll.mean().item():2.2g}')
plt.xlabel(f'{now.date()}')
plt.ylabel('energy(kWh/hh)')
plt.legend()
plt.xticks(rotation=45)
plt.show()
def plot_performance(ds_preds, full=False):
"""Multiple plots using xr_preds"""
plot_prediction(ds_preds, 24)
ds_preds.mean('t_source').plot.scatter('t_ahead_hours', 'nll') # Mean over all predictions
n = len(ds_preds.t_source)
plt.ylabel('Negative Log Likelihood (lower is better)')
plt.xlabel('Hours ahead')
plt.title(f'NLL vs time ahead (no. samples={n})')
plt.show()
# Make a plot of the NLL over time. Does this solution get worse with time?
if full:
d = ds_preds.mean('t_ahead').groupby('t_source').mean().plot.scatter('t_source', 'nll')
plt.xticks(rotation=45)
plt.title('NLL over source time (lower is better)')
plt.show()
# A scatter plot is easy with xarray
if full:
plt.figure(figsize=(5, 5))
ds_preds.plot.scatter('y_true', 'y_pred', s=.01)
plt.show()
# -
def plot_hist(trainer):
try:
df_hist = pd.read_csv(trainer.logger.experiment.metrics_file_path)
df_hist['epoch'] = df_hist['epoch'].ffill()
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
if len(df_histe)>1:
df_histe[['loss/train', 'loss/val']].plot(title='history')
return df_histe
except Exception:
pass
# ## Lightning
# +
import pytorch_lightning as pl
class PL_MODEL(pl.LightningModule):
def __init__(self, model, lr=3e-4, patience=None, weight_decay=0):
super().__init__()
self._model = model
self.lr = lr
self.patience = patience
self.weight_decay = weight_decay
def forward(self, x_past, y_past, x_future, y_future=None):
"""Eval/Predict"""
y_dist, extra = self._model(x_past, y_past, x_future, y_future)
assert torch.isfinite(y_dist.loc).all(), 'output should be finite'
return y_dist, extra
def training_step(self, batch, batch_idx, phase='train'):
x_past, y_past, x_future, y_future = batch
y_dist, extra = self.forward(*batch)
loss = -y_dist.log_prob(y_future).mean()
assert torch.isfinite(loss).all(), 'loss should be finite'
self.log_dict({f'loss/{phase}':loss})
if ('loss' in extra) and (phase=='train'):
# some models have a special loss
loss = extra['loss']
self.log_dict({f'model_loss/{phase}':loss})
return loss
def validation_step(self, batch, batch_idx):
return self.training_step(batch, batch_idx, phase='val')
def configure_optimizers(self):
optim = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
optim,
patience=self.patience,
verbose=True,
min_lr=1e-7,
) if self.patience else None
return {'optimizer': optim, 'lr_scheduler': scheduler, 'monitor': 'loss/val'}
# -
# # Run
from torch.utils.data import DataLoader
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
# Models
from seq2seq_time.models.lstm_seq2seq import LSTMSeq2Seq
from seq2seq_time.models.lstm_seq import LSTMSeq
from seq2seq_time.models.lstm import LSTM
from seq2seq_time.models.baseline import BaselineLast
from seq2seq_time.models.transformer import Transformer
from seq2seq_time.models.transformer_autor import TransformerAutoR
from seq2seq_time.models.transformer_seq2seq import TransformerSeq2Seq
from seq2seq_time.models.transformer_seq import TransformerSeq
from seq2seq_time.models.neural_process import RANP
from seq2seq_time.models.transformer_process import TransformerProcess
from seq2seq_time.models.tcn import TemporalConvNet
# ## Plots
# +
import gc
def free_mem():
gc.collect()
torch.cuda.empty_cache()
gc.collect()
# -
models = [
lambda: BaselineLast(),
# lambda: TransformerAutoR(input_size,
# output_size, hidden_out_size=32),
lambda: RANP(input_size,
output_size, hidden_dim=32,
latent_dim=64, n_decoder_layers=4),
lambda: LSTM(input_size,
output_size,
hidden_size=80,
lstm_layers=3,
lstm_dropout=0.3),
lambda: LSTMSeq2Seq(input_size,
output_size,
hidden_size=64,
lstm_layers=2,
lstm_dropout=0.25),
lambda: TransformerSeq2Seq(input_size,
output_size,
hidden_size=128,
nhead=8,
nlayers=4,
attention_dropout=0.2),
lambda: Transformer(input_size,
output_size,
attention_dropout=0.2,
nhead=8,
nlayers=8,
hidden_size=128),
lambda :TransformerProcess(input_size,
output_size, hidden_size=16,
latent_dim=8, dropout=0.5,
nlayers=4,)
# lambda :TemporalConvNet()
]
# models
# +
from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction, BejingPM25, GasSensor, MetroInterstateTraffic
datasets = [IMOSCurrentsVel, BejingPM25, GasSensor, AppliancesEnergyPrediction, MetroInterstateTraffic]
datasets
# +
# GasSensor(datasets_root)
# -
# ## Train
from collections import defaultdict
results = defaultdict(dict)
from seq2seq_time.metrics import rmse, smape
# +
for Dataset in datasets:
dataset_name = Dataset.__name__
dataset = Dataset(datasets_root)
ds_train, ds_test = dataset.to_datasets(window_past=window_past,
window_future=window_future)
# Init data
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
input_size = x_past.shape[-1]
output_size = y_future.shape[-1]
# Loaders
dl_train = DataLoader(ds_train,
batch_size=batch_size,
shuffle=True,
pin_memory=num_workers == 0,
num_workers=num_workers)
dl_test = DataLoader(ds_test,
batch_size=batch_size,
num_workers=num_workers)
for m_fn in models:
try:
free_mem()
pt_model = m_fn()
model_name = type(pt_model).__name__
print(dataset_name, model_name)
# Wrap in lightning
patience = 2
model = PL_MODEL(pt_model,
lr=3e-4, patience=patience,
weight_decay=1e-5).to(device)
# Trainer
trainer = pl.Trainer(
gpus=1,
min_epochs=2,
max_epochs=20,
amp_level='O1',
precision=16,
limit_train_batches=1000,
limit_val_batches=100,
logger=CSVLogger("../outputs", name=f'{dataset_name}_{model_name}'),
callbacks=[
EarlyStopping(monitor='loss/val', patience=patience * 2, verbose=True),
],
)
# Train
trainer.fit(model, dl_train, dl_test)
ds_preds = predict(model.to(device),
ds_test,
batch_size * 2,
device=device,
scaler=dataset.output_scaler)
print(dataset_name, model_name)
print(f'mean_NLL {ds_preds.nll.mean().item():2.2f}')
loss = ds_preds.nll.mean().item()
# Performance
# print(plot_hist(trainer))
# plot_performance(ds_preds)
metrics = dict(
rmse=rmse(ds_preds.y_true, ds_preds.y_pred).item(),
smape=smape(ds_preds.y_true, ds_preds.y_pred).item(),
nll=ds_preds.nll.mean().item()
)
results[dataset_name][model_name] = metrics
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display(df_results)
dset_to_nc(ds_preds, Path(trainer.logger.experiment.log_dir)/'ds_preds.nc')
model.cpu()
except Exception as e:
logging.exception('failed to run model')
df_results = pd.concat({k:pd.DataFrame(v) for k,v in results.items()})
display(df_results)
# +
# EarlyStopping?
# +
# ds_preds.to_netcdf(trainer.logger.experiment.log_dir+'/ds_preds2.nc')
# -
# # Plots
+14 -18
View File
@@ -7,9 +7,11 @@ from sklearn_pandas import DataFrameMapper
import xarray as xr
import pandas as pd
import numpy as np
import zipfile
from .dataset import Seq2SeqDataSet
from .util import normalize_encode_dataframe, timeseries_split
from ..util import dset_to_nc
from .tidal import generate_tidal_periods
@@ -77,20 +79,18 @@ class GasSensor(RegressionForecastData):
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00487/gas-sensor-array-temperature-modulation.zip'
# download if needed
extract_path = self.datasets_root/'GasSensor'
files = sorted(extract_path.glob('*.csv'))
if len(files)<13:
print('download_and_extract_archive')
download_and_extract_archive(url, self.datasets_root, extract_path)
# extract_path = self.datasets_root/'gas-sensor-array-temperature-modulation.zip'
download_url(url, self.datasets_root)
# Load csv's
files = sorted(extract_path.glob('*.csv'))
dfs = []
for f in files:
now = pd.to_datetime(f.stem, format='%Y%m%d_%H%M%S')
df = pd.read_csv(f)
df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
dfs.append(df)
# Load csv's from inside zip
zf = zipfile.ZipFile(self.datasets_root / 'gas-sensor-array-temperature-modulation.zip')
dfs=[]
for f in zf.namelist():
if f.endswith('.csv'):
now = pd.to_datetime(Pdset_to_ncath(f).stem, format='%Y%m%d_%H%M%S')
df = pd.read_csv(zf.open(f))
df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
dfs.append(df)
self.df = pd.concat(dfs).dropna(subset=self.columns_target)
df = df[[ 'CO (ppm)', 'Humidity (%r.h.)', 'Temperature (C)',
@@ -272,11 +272,7 @@ def get_current_timeseries(
# Add tidal freqs
xd = xd.merge(df_eta)
# Cache to nc
xd.to_netcdf(outfile)
print(
f'wrote "{outfile}" with size {outfile.stat().st_size*1e-6:2.2f} MB'
)
dset_to_nc(xd, outfile)
return outfile
+2 -2
View File
@@ -31,7 +31,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
assert df.index.freq is not None, 'should have freq'
assert_no_objects(df)
self.df = df.dropna(subset=columns_target)
self.df = df.dropna(subset=columns_target).ffill()
self.window_past = window_past
self.window_future = window_future
@@ -100,7 +100,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
def __repr__(self):
t = self.df.index
return f'<{type(self).__name__}(shape={self.df.shape}, times={t[0]} to {t[1]} at {t.freq.freqstr})>'
return f'<{type(self).__name__}(shape={self.df.shape}, times={t[0]} to {t[1]})>'
class Seq2SeqDataSets(torch.utils.data.Dataset):
+1 -1
View File
@@ -16,4 +16,4 @@ def normalize_encode_dataframe(df, encoder=OrdinalEncoder):
def timeseries_split(df, test_fraction=0.2):
"""Split timeseries data with test in the future"""
i = int(len(df)*test_fraction)
return df.iloc[:i], df.iloc[i:]
return df.iloc[:-i], df.iloc[-i:]
+23
View File
@@ -0,0 +1,23 @@
import numpy as np
EPSILON = 1e-10
def _error(actual: np.ndarray, predicted: np.ndarray):
""" Simple error """
return actual - predicted
def mse(actual: np.ndarray, predicted: np.ndarray):
""" Mean Squared Error """
return np.mean(np.square(_error(actual, predicted)))
def rmse(actual: np.ndarray, predicted: np.ndarray):
""" Root Mean Squared Error """
return np.sqrt(mse(actual, predicted))
def smape(actual: np.ndarray, predicted: np.ndarray):
"""
Symmetric Mean Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON))
+146
View File
@@ -0,0 +1,146 @@
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
class Chomp1d(nn.Module):
def __init__(self, chomp_size):
super(Chomp1d, self).__init__()
self.chomp_size = chomp_size
def forward(self, x):
return x[:, :, : -self.chomp_size].contiguous()
class Conv(nn.Module):
"""Causal convolution layer."""
def __init__(
self,
n_inputs,
n_outputs,
kernel_size,
stride,
dilation,
padding,
causal=True,
):
super().__init__()
self.conv = nn.Conv1d(
n_inputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.chomp = Chomp1d(padding)
self.causal = causal
def forward(self, x):
out = self.conv(x)
if self.causal:
out = self.chomp(out)
return out
class TemporalBlock(nn.Module):
def __init__(
self,
n_inputs,
n_outputs,
kernel_size,
stride,
dilation,
padding,
dropout=0.2,
):
super(TemporalBlock, self).__init__()
self.conv1 = Conv(
n_inputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
self.conv2 = Conv(
n_outputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(dropout)
self.net = nn.Sequential(
self.conv1, self.relu1, self.dropout1, self.conv2, self.relu2, self.dropout2
)
self.downsample = (
Conv(
n_inputs,
n_outputs,
1,
stride=1,
padding=0,
dilation=1,
causal=False,
)
if n_inputs != n_outputs
else None
)
self.relu = nn.ReLU()
def forward(self, x):
out = x
for i, l in enumerate(self.net):
out = l(out)
res = x if self.downsample is None else self.downsample(x)
return self.relu(out + res)
class TemporalConvNet(nn.Module):
"""
See:
- https://arxiv.org/pdf/1803.01271.pdf
- https://github.com/locuslab/TCN
"""
def __init__(
self,
num_inputs,
num_channels,
num_embeddings=0,
kernel_size=2,
dropout=0.2,
embedding_dim=2,
):
super(TemporalConvNet, self).__init__()
layers = []
num_levels = len(num_channels)
for i in range(num_levels):
dilation_size = 2 ** i
in_channels = num_inputs if i == 0 else num_channels[i - 1]
out_channels = num_channels[i]
layers += [
TemporalBlock(
in_channels,
out_channels,
kernel_size,
stride=1,
dilation=dilation_size,
padding=(kernel_size - 1) * dilation_size,
dropout=dropout,
)
]
self.network = nn.Sequential(*layers)
def forward(self, x):
out = x
for l in self.network:
out = l(out)
return out
+11
View File
@@ -1,6 +1,9 @@
from pathlib import Path
import torch
import xarray as xr
import logging
logger = logging.getLogger(__file__)
project_dir = Path(__file__).parent.parent
def to_numpy(x):
@@ -12,3 +15,11 @@ def to_numpy(x):
def mask_upper_triangular(N, device):
"""Causal attention."""
return torch.triu(torch.ones(N, N), diagonal=1).to(device).bool()
def dset_to_nc(dset, f, engine="netcdf4", compression={"zlib": True}):
if isinstance(dset, xr.DataArray):
dset = dset.to_dataset(name="data")
encoding = {k: {"zlib": True} for k in dset.data_vars}
logger.info(f"saving to {f}")
dset.to_netcdf(f, engine=engine, encoding=encoding)
logger.info(f"Wrote {f.stem}.nc size={f.stat().st_size/1e6} M")