mirror of
https://github.com/wassname/seq2seq-time.git
synced 2026-06-27 18:06:49 +08:00
misc
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1,725 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# ---
|
|
||||||
# jupyter:
|
|
||||||
# jupytext:
|
|
||||||
# formats: ipynb,py:light
|
|
||||||
# text_representation:
|
|
||||||
# extension: .py
|
|
||||||
# format_name: light
|
|
||||||
# format_version: '1.5'
|
|
||||||
# jupytext_version: 1.6.0
|
|
||||||
# kernelspec:
|
|
||||||
# display_name: seq2seq-time
|
|
||||||
# language: python
|
|
||||||
# name: seq2seq-time
|
|
||||||
# ---
|
|
||||||
|
|
||||||
# # Sequence to Sequence Models for Timeseries Regression
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# In this notebook we are going to tackle a harder problem:
|
|
||||||
# - predicting the future on a timeseries
|
|
||||||
# - using an LSTM
|
|
||||||
# - with rough uncertainty (uncalibrated)
|
|
||||||
# - outputing sequence of predictions
|
|
||||||
#
|
|
||||||
# <img src="../reports/figures/Seq2Seq for regression.png" />
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# https://medium.com/@boitemailjeanmid/smart-meters-in-london-part1-description-and-first-insights-jean-michel-d-db97af2de71b
|
|
||||||
#
|
|
||||||
|
|
||||||
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
|
|
||||||
# %load_ext autoreload
|
|
||||||
# %autoreload 2
|
|
||||||
# %aimport -pandas
|
|
||||||
# %aimport -torch
|
|
||||||
# %aimport -numpy
|
|
||||||
# %aimport -matplotlib
|
|
||||||
# %aimport -dask
|
|
||||||
# %aimport -tqdm
|
|
||||||
# %matplotlib inline
|
|
||||||
|
|
||||||
# +
|
|
||||||
# Imports
|
|
||||||
import torch
|
|
||||||
from torch import nn, optim
|
|
||||||
from torch.nn import functional as F
|
|
||||||
from torch.autograd import Variable
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
plt.rcParams['figure.figsize'] = (12.0, 3.0)
|
|
||||||
plt.style.use('ggplot')
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from tqdm.auto import tqdm
|
|
||||||
|
|
||||||
import pytorch_lightning as pl
|
|
||||||
# -
|
|
||||||
|
|
||||||
import warnings
|
|
||||||
warnings.simplefilter('once')
|
|
||||||
|
|
||||||
from seq2seq_time.data.dataset import Seq2SeqDataSet, Seq2SeqDataSets
|
|
||||||
from seq2seq_time.predict import predict, predict_multi
|
|
||||||
|
|
||||||
import logging, sys
|
|
||||||
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
||||||
|
|
||||||
# ## Parameters
|
|
||||||
|
|
||||||
# +
|
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
||||||
print(f'using {device}')
|
|
||||||
|
|
||||||
columns_target=['energy(kWh/hh)']
|
|
||||||
window_past = 48*2
|
|
||||||
window_future = 48*2
|
|
||||||
batch_size = 256
|
|
||||||
num_workers = 5
|
|
||||||
freq = '30T'
|
|
||||||
max_rows = 5e5
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
# ## Load data
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
def get_smartmeter_df(indir=Path('../data/raw/smart-meters-in-london'), max_files=8):
|
|
||||||
"""
|
|
||||||
Data loading and cleanding is always messy, so understand this code is optional.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Load csv files
|
|
||||||
csv_files = sorted((indir/'halfhourly_dataset').glob('*.csv'))[:max_files]
|
|
||||||
|
|
||||||
dfs = []
|
|
||||||
for f in csv_files:
|
|
||||||
df = (pd.read_csv(f, parse_dates=[1], na_values=['Null'])
|
|
||||||
.groupby('tstp')
|
|
||||||
.sum()
|
|
||||||
.sort_index()
|
|
||||||
)
|
|
||||||
df['block'] = f.stem
|
|
||||||
|
|
||||||
# Drop nan and 0's
|
|
||||||
df = df[df['energy(kWh/hh)']!=0]
|
|
||||||
df = df.dropna()
|
|
||||||
|
|
||||||
# Add time features
|
|
||||||
time = df.index.to_series()
|
|
||||||
df["month"] = time.dt.month
|
|
||||||
df['day'] = time.dt.day
|
|
||||||
df['week'] = time.dt.week
|
|
||||||
df['hour'] = time.dt.hour
|
|
||||||
df['minute'] = time.dt.minute
|
|
||||||
df['dayofweek'] = time.dt.dayofweek
|
|
||||||
|
|
||||||
# Load weather data
|
|
||||||
df_weather = pd.read_csv(indir/'weather_hourly_darksky.csv', parse_dates=[3])
|
|
||||||
use_cols = ['visibility', 'windBearing', 'temperature', 'time', 'dewPoint',
|
|
||||||
'pressure', 'apparentTemperature', 'windSpeed',
|
|
||||||
'humidity']
|
|
||||||
df_weather = df_weather[use_cols].set_index('time')
|
|
||||||
|
|
||||||
# Resample to match energy data
|
|
||||||
# Use first, since we have bearing, and you can't take mean
|
|
||||||
df_weather = df_weather.resample(freq).first().ffill()
|
|
||||||
|
|
||||||
# Join weather and energy data
|
|
||||||
df = pd.merge(df, df_weather, how='inner', left_index=True, right_index=True, sort=True)
|
|
||||||
|
|
||||||
# Holidays
|
|
||||||
df_hols = pd.read_csv(indir/'uk_bank_holidays.csv', parse_dates=[0])
|
|
||||||
holidays = set(df_hols['Bank holidays'].dt.round('D'))
|
|
||||||
def is_holiday(dt):
|
|
||||||
return dt in holidays
|
|
||||||
days = df.index.floor('D')
|
|
||||||
holiday_mapping = days.unique().to_series().apply(is_holiday).astype(int).to_dict()
|
|
||||||
df['holiday'] = days.to_series().map(holiday_mapping).values
|
|
||||||
|
|
||||||
# sort
|
|
||||||
df.index.name = 'Date'
|
|
||||||
df = df.loc['2012-09':] # Weird value before this
|
|
||||||
|
|
||||||
dfs.append(df)
|
|
||||||
|
|
||||||
return pd.concat(dfs)
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
# Our dataset is the london smartmeter data. But at half hour intervals
|
|
||||||
|
|
||||||
# +
|
|
||||||
df = get_smartmeter_df(max_files=12)
|
|
||||||
|
|
||||||
# # Just get the first one for now
|
|
||||||
# dfs = list(dfs)
|
|
||||||
|
|
||||||
# # df = df.resample(freq).first().dropna() # Where empty we will backfill, this will respect causality, and mostly maintain the mean
|
|
||||||
|
|
||||||
df = df.tail(int(max_rows)).copy() # Just use last X rows
|
|
||||||
# df = pd.concat(dfs[:6], 0)
|
|
||||||
# # df = dfs[0]
|
|
||||||
print(df.block.value_counts())
|
|
||||||
df
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ### Plot/explore
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
import holoviews as hv
|
|
||||||
from holoviews import opts
|
|
||||||
|
|
||||||
from holoviews.plotting.links import RangeToolLink
|
|
||||||
|
|
||||||
import datashader as ds
|
|
||||||
|
|
||||||
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize
|
|
||||||
from holoviews.operation import decimate
|
|
||||||
|
|
||||||
hv.extension('bokeh')
|
|
||||||
|
|
||||||
|
|
||||||
# def house_curve(Name=None):
|
|
||||||
# if isinstance(Name, int):
|
|
||||||
# name = df.block.unique()[Name]
|
|
||||||
# d = df[df.block == Name]
|
|
||||||
# d_curve = hv.Curve(d, 'Date', 'energy(kWh/hh)', label=Name).opts(framewise=True)
|
|
||||||
# return d_curve
|
|
||||||
|
|
||||||
|
|
||||||
# dmap = hv.DynamicMap(house_curve, kdims=['Name'])
|
|
||||||
# dmap = dmap.redim.values(Name=list(df.block.unique()))
|
|
||||||
# dynspread(datashade(dmap).opts(width=800,
|
|
||||||
# height=300,
|
|
||||||
# tools=['xwheel_zoom', 'pan'],
|
|
||||||
# active_tools=['xwheel_zoom', 'pan'],
|
|
||||||
# default_tools=['reset', 'save', 'hover']
|
|
||||||
# ))
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ### Profiling
|
|
||||||
|
|
||||||
# +
|
|
||||||
# from pandas_profiling import ProfileReport
|
|
||||||
# profile = ProfileReport(df, title="Pandas Profiling Report", minimal=True)
|
|
||||||
# profile
|
|
||||||
# -
|
|
||||||
|
|
||||||
# ### Norm
|
|
||||||
|
|
||||||
df.describe()
|
|
||||||
|
|
||||||
# +
|
|
||||||
import sklearn
|
|
||||||
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
|
|
||||||
from sklearn_pandas import DataFrameMapper
|
|
||||||
|
|
||||||
columns_input_numeric = list(df.drop(columns=columns_target)._get_numeric_data().columns)
|
|
||||||
columns_categorical = list(set(df.columns)-set(columns_input_numeric)-set(columns_target))
|
|
||||||
|
|
||||||
output_scalers = [([n], StandardScaler()) for n in columns_target]
|
|
||||||
transformers=output_scalers + \
|
|
||||||
[([n], StandardScaler()) for n in columns_input_numeric] + \
|
|
||||||
[([n], OrdinalEncoder()) for n in columns_categorical]
|
|
||||||
scaler = DataFrameMapper(transformers, df_out=True)
|
|
||||||
df_norm = scaler.fit_transform(df)
|
|
||||||
df_norm
|
|
||||||
# -
|
|
||||||
|
|
||||||
output_scaler = next(filter(lambda r:r[0][0] in columns_target, scaler.features))[-1]
|
|
||||||
output_scaler
|
|
||||||
|
|
||||||
# ### Split
|
|
||||||
|
|
||||||
# +
|
|
||||||
# split data, with the test in the future
|
|
||||||
|
|
||||||
d0 =df_norm.index.min()
|
|
||||||
d1 = df_norm.index.max()
|
|
||||||
split_time = d0+(d1-d0)*0.8
|
|
||||||
split_time = split_time.round('1D')
|
|
||||||
print(split_time)
|
|
||||||
df_train = df_norm.groupby('block').apply(lambda d:d.loc[:split_time]).reset_index(level=0, drop=True)
|
|
||||||
df_test = df_norm.groupby('block').apply(lambda d:d.loc[split_time:]).reset_index(level=0, drop=True)
|
|
||||||
# df_test
|
|
||||||
|
|
||||||
# +
|
|
||||||
# # Show split
|
|
||||||
# df_train['energy(kWh/hh)'].plot(label='train')
|
|
||||||
# df_test['energy(kWh/hh)'].plot(label='test')
|
|
||||||
# plt.ylabel('energy(kWh/hh)')
|
|
||||||
# plt.legend()
|
|
||||||
# -
|
|
||||||
|
|
||||||
# # Show split
|
|
||||||
scatter = dynspread(datashade(hv.Curve(df_train, kdims=['Date'], vdims=['energy(kWh/hh)', 'block']).groupby('block'), cmap='blue'))
|
|
||||||
scatter *= dynspread(datashade(hv.Curve(df_test, kdims=['Date'], vdims=['energy(kWh/hh)', 'block']).groupby('block'), cmap='red'))
|
|
||||||
scatter = scatter.opts(plot=dict(width=800))
|
|
||||||
scatter
|
|
||||||
|
|
||||||
# ### Dataset
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
# ### Dataset
|
|
||||||
# These are the columns that we wont know in the future
|
|
||||||
# We need to blank them out in x_future
|
|
||||||
columns_blank=['visibility',
|
|
||||||
'windBearing', 'temperature', 'dewPoint', 'pressure',
|
|
||||||
'apparentTemperature', 'windSpeed', 'humidity']
|
|
||||||
df_trains = [d.resample(freq).first().ffill().dropna() for _,d in df_train.groupby('block')]
|
|
||||||
df_tests = [d.resample(freq).first().ffill().dropna() for _,d in df_test.groupby('block')]
|
|
||||||
ds_train = Seq2SeqDataSets(df_trains,
|
|
||||||
window_past=window_past,
|
|
||||||
window_future=window_future,
|
|
||||||
columns_blank=columns_blank)
|
|
||||||
ds_test = Seq2SeqDataSets(df_tests,
|
|
||||||
window_past=window_past,
|
|
||||||
window_future=window_future,
|
|
||||||
columns_blank=columns_blank)
|
|
||||||
print(ds_train)
|
|
||||||
print(ds_test)
|
|
||||||
# -
|
|
||||||
# we can treat it like an array
|
|
||||||
ds_train[0]
|
|
||||||
len(ds_train)
|
|
||||||
ds_train[-1]
|
|
||||||
|
|
||||||
# +
|
|
||||||
# We can get rows
|
|
||||||
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
|
|
||||||
|
|
||||||
# Plot one instance, this is what the model sees
|
|
||||||
y_past['energy(kWh/hh)'].plot(label='past')
|
|
||||||
y_future['energy(kWh/hh)'].plot(ax=plt.gca(), label='future')
|
|
||||||
plt.legend()
|
|
||||||
plt.ylabel('energy(kWh/hh)')
|
|
||||||
|
|
||||||
# Notice we've added on two new columns tsp (time since present) and is_past
|
|
||||||
x_past.tail()
|
|
||||||
# -
|
|
||||||
|
|
||||||
# Notice we've hidden some future columns to prevent cheating
|
|
||||||
x_future.tail()
|
|
||||||
|
|
||||||
|
|
||||||
# ## Plot helpers
|
|
||||||
|
|
||||||
# +
|
|
||||||
def plot_prediction(ds_preds, i):
|
|
||||||
"""Plot a prediction into the future, at a single point in time."""
|
|
||||||
d = ds_preds.isel(t_source=i)
|
|
||||||
|
|
||||||
# Get arrays
|
|
||||||
xf = d.t_target
|
|
||||||
yp = d.y_pred
|
|
||||||
s = d.y_pred_std
|
|
||||||
yt = d.y_true
|
|
||||||
now = d.t_source.squeeze()
|
|
||||||
|
|
||||||
|
|
||||||
plt.figure(figsize=(12, 4))
|
|
||||||
|
|
||||||
plt.scatter(xf, yt, label='true', c='k', s=6)
|
|
||||||
ylim = plt.ylim()
|
|
||||||
|
|
||||||
# plot prediction
|
|
||||||
plt.fill_between(xf, yp-2*s, yp+2*s, alpha=0.25,
|
|
||||||
facecolor="b",
|
|
||||||
interpolate=True,
|
|
||||||
label="2 std",)
|
|
||||||
plt.plot(xf, yp, label='pred', c='b')
|
|
||||||
|
|
||||||
# plot true
|
|
||||||
plt.scatter(
|
|
||||||
d.t_past,
|
|
||||||
d.y_past,
|
|
||||||
c='k',
|
|
||||||
s=6
|
|
||||||
)
|
|
||||||
|
|
||||||
# plot a red line for now
|
|
||||||
plt.vlines(x=now, ymin=0, ymax=1, label='now', color='r')
|
|
||||||
plt.ylim(*ylim)
|
|
||||||
|
|
||||||
now=pd.Timestamp(now.values)
|
|
||||||
plt.title(f'Prediction NLL={d.nll.mean().item():2.2g}')
|
|
||||||
plt.xlabel(f'{now.date()}')
|
|
||||||
plt.ylabel('energy(kWh/hh)')
|
|
||||||
plt.legend()
|
|
||||||
plt.xticks(rotation=45)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
def plot_performance(ds_preds, full=False):
|
|
||||||
"""Multiple plots using xr_preds"""
|
|
||||||
plot_prediction(ds_preds, 24)
|
|
||||||
|
|
||||||
ds_preds.mean('t_source').plot.scatter('t_ahead_hours', 'nll') # Mean over all predictions
|
|
||||||
n = len(ds_preds.t_source)
|
|
||||||
plt.ylabel('Negative Log Likelihood (lower is better)')
|
|
||||||
plt.xlabel('Hours ahead')
|
|
||||||
plt.title(f'NLL vs time ahead (no. samples={n})')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# Make a plot of the NLL over time. Does this solution get worse with time?
|
|
||||||
if full:
|
|
||||||
d = ds_preds.mean('t_ahead').groupby('t_source').mean().plot.scatter('t_source', 'nll')
|
|
||||||
plt.xticks(rotation=45)
|
|
||||||
plt.title('NLL over source time (lower is better)')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# A scatter plot is easy with xarray
|
|
||||||
if full:
|
|
||||||
plt.figure(figsize=(5, 5))
|
|
||||||
ds_preds.plot.scatter('y_true', 'y_pred', s=.01)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_hist(trainer):
|
|
||||||
try:
|
|
||||||
df_hist = pd.read_csv(trainer.logger.experiment.metrics_file_path)
|
|
||||||
df_hist['epoch'] = df_hist['epoch'].ffill()
|
|
||||||
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
|
|
||||||
if len(df_histe)>1:
|
|
||||||
df_histe[['loss/train', 'loss/val']].plot(title='history')
|
|
||||||
return df_histe
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# ## Lightning
|
|
||||||
|
|
||||||
# +
|
|
||||||
import pytorch_lightning as pl
|
|
||||||
|
|
||||||
class PL_MODEL(pl.LightningModule):
|
|
||||||
def __init__(self, model, lr=3e-4, patience=2):
|
|
||||||
super().__init__()
|
|
||||||
self._model = model
|
|
||||||
self.lr = lr
|
|
||||||
self.patience = patience
|
|
||||||
|
|
||||||
def forward(self, x_past, y_past, x_future, y_future=None):
|
|
||||||
"""Eval/Predict"""
|
|
||||||
y_dist, extra = self._model(x_past, y_past, x_future, y_future)
|
|
||||||
return y_dist, extra
|
|
||||||
|
|
||||||
def training_step(self, batch, batch_idx, phase='train'):
|
|
||||||
x_past, y_past, x_future, y_future = batch
|
|
||||||
y_dist, extra = self.forward(*batch)
|
|
||||||
loss = -y_dist.log_prob(y_future).mean()
|
|
||||||
self.log_dict({f'loss/{phase}':loss})
|
|
||||||
if ('loss' in extra) and (phase=='train'):
|
|
||||||
# some models have a special loss
|
|
||||||
loss = extra['loss']
|
|
||||||
self.log_dict({f'model_loss/{phase}':loss})
|
|
||||||
return loss
|
|
||||||
|
|
||||||
def validation_step(self, batch, batch_idx):
|
|
||||||
return self.training_step(batch, batch_idx, phase='val')
|
|
||||||
|
|
||||||
def configure_optimizers(self):
|
|
||||||
optim = torch.optim.Adam(self.parameters(), lr=self.lr)
|
|
||||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
|
||||||
optim,
|
|
||||||
patience=self.patience,
|
|
||||||
verbose=True,
|
|
||||||
min_lr=1e-7,
|
|
||||||
)
|
|
||||||
return {'optimizer': optim, 'lr_scheduler': scheduler, 'monitor': 'loss/val'}
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
# # Run
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
from pytorch_lightning.loggers import CSVLogger
|
|
||||||
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
# Init data
|
|
||||||
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
|
|
||||||
input_size = x_past.shape[-1]
|
|
||||||
output_size = y_future.shape[-1]
|
|
||||||
|
|
||||||
dl_train = DataLoader(ds_train,
|
|
||||||
batch_size=batch_size,
|
|
||||||
shuffle=True,
|
|
||||||
pin_memory=num_workers==0,
|
|
||||||
num_workers=num_workers)
|
|
||||||
dl_test = DataLoader(ds_test, batch_size=batch_size, num_workers=num_workers)
|
|
||||||
# -
|
|
||||||
|
|
||||||
from seq2seq_time.models.lstm_seq2seq import LSTMSeq2Seq
|
|
||||||
from seq2seq_time.models.lstm_seq import LSTMSeq
|
|
||||||
from seq2seq_time.models.lstm import LSTM
|
|
||||||
from seq2seq_time.models.baseline import BaselineLast
|
|
||||||
from seq2seq_time.models.transformer import Transformer
|
|
||||||
from seq2seq_time.models.transformer_seq2seq import TransformerSeq2Seq
|
|
||||||
from seq2seq_time.models.transformer_seq import TransformerSeq
|
|
||||||
from seq2seq_time.models.neural_process import RANP
|
|
||||||
# ## Plots
|
|
||||||
# +
|
|
||||||
models = [
|
|
||||||
RANP(input_size,
|
|
||||||
output_size),
|
|
||||||
LSTM(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=80,
|
|
||||||
lstm_layers=3,
|
|
||||||
lstm_dropout=0.3),
|
|
||||||
|
|
||||||
LSTMSeq2Seq(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=64,
|
|
||||||
lstm_layers=2,
|
|
||||||
lstm_dropout=0.25),
|
|
||||||
TransformerSeq2Seq(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=64,
|
|
||||||
nhead=8,
|
|
||||||
nlayers=4,
|
|
||||||
attention_dropout=0.3),
|
|
||||||
Transformer(input_size,
|
|
||||||
output_size,
|
|
||||||
attention_dropout=0.3,
|
|
||||||
nhead=8,
|
|
||||||
nlayers=6,
|
|
||||||
hidden_size=64),
|
|
||||||
TransformerSeq(input_size,
|
|
||||||
output_size),
|
|
||||||
LSTMSeq(input_size,
|
|
||||||
output_size),
|
|
||||||
|
|
||||||
]
|
|
||||||
# -
|
|
||||||
|
|
||||||
# Baseline model
|
|
||||||
pt_model = BaselineLast()
|
|
||||||
model = PL_MODEL(pt_model).to(device)
|
|
||||||
trainer = pl.Trainer(gpus=1,
|
|
||||||
max_epochs=1,
|
|
||||||
limit_train_batches=0.01,
|
|
||||||
logger=CSVLogger("logs",
|
|
||||||
name=type(pt_model).__name__),
|
|
||||||
)
|
|
||||||
trainer.fit(model, dl_train, dl_test)
|
|
||||||
print(plot_hist(trainer))
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*8,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
print(f'baseline nll: {ds_preds.nll.mean().item():2.2g}')
|
|
||||||
|
|
||||||
for pt_model in models:
|
|
||||||
name = type(pt_model).__name__
|
|
||||||
print(name)
|
|
||||||
|
|
||||||
# Wrap in lightning
|
|
||||||
patience = 2
|
|
||||||
model = PL_MODEL(pt_model, patience=patience, lr=3e-4).to(device)
|
|
||||||
|
|
||||||
# Trainer
|
|
||||||
trainer = pl.Trainer(gpus=1,
|
|
||||||
min_epochs=2,
|
|
||||||
max_epochs=10,
|
|
||||||
amp_level='O1',
|
|
||||||
precision=16,
|
|
||||||
gradient_clip_val=1,
|
|
||||||
logger=CSVLogger("logs",
|
|
||||||
name=type(pt_model).__name__),
|
|
||||||
callbacks=[
|
|
||||||
EarlyStopping(monitor='loss/val', patience=patience*2),
|
|
||||||
# PrintTableMetricsCallback2()
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Train
|
|
||||||
trainer.fit(model, dl_train, dl_test)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*8,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
|
|
||||||
print(name)
|
|
||||||
print(f'mean_NLL {ds_predss.nll.mean().item():2.2f}')
|
|
||||||
|
|
||||||
# Performance
|
|
||||||
ds_preds = ds_predss.isel(block=0)
|
|
||||||
print(plot_hist(trainer))
|
|
||||||
plot_performance(ds_preds)
|
|
||||||
|
|
||||||
# +
|
|
||||||
# ds_preds = predict(model.to(device),
|
|
||||||
# ds_test.datasets[0],
|
|
||||||
# batch_size*8,
|
|
||||||
# device=device,
|
|
||||||
# scaler=output_scaler)
|
|
||||||
# -
|
|
||||||
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*8,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
|
|
||||||
ds_pred_block = ds_predss.isel(block=1)
|
|
||||||
|
|
||||||
# # holoviews pred
|
|
||||||
|
|
||||||
import holoviews as hv
|
|
||||||
from holoviews import opts
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
def plot_prediction_now(t_source):
|
|
||||||
"""Plot predictions with holoviews"""
|
|
||||||
|
|
||||||
# Let us pass in an int
|
|
||||||
if isinstance(t_source, int):
|
|
||||||
t_source = ds_pred_block.t_source[t_source].to_pandas()
|
|
||||||
|
|
||||||
d = ds_pred_block.sel(t_source=t_source)
|
|
||||||
|
|
||||||
# Sometimes there are duplicate times, take the first
|
|
||||||
if len(d.t_source.shape) and d.t_source.shape[0] > 0:
|
|
||||||
d = d.isel(t_source=0)
|
|
||||||
if len(d.t_source.shape) and d.t_source.shape[0] == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
now = d.t_source.to_pandas()
|
|
||||||
|
|
||||||
# Plot true
|
|
||||||
x = np.concatenate([d.t_past, d.t_target])
|
|
||||||
yt = np.concatenate([d.y_past, d.y_true])
|
|
||||||
p = hv.Scatter({
|
|
||||||
'x': x,
|
|
||||||
'y': yt
|
|
||||||
}, label='true').opts(color='black')
|
|
||||||
|
|
||||||
# Get arrays
|
|
||||||
xf = d.t_target.values
|
|
||||||
yp = d.y_pred
|
|
||||||
s = d.y_pred_std
|
|
||||||
p *= hv.Curve({
|
|
||||||
'x': xf,
|
|
||||||
'y': yp
|
|
||||||
}, label='pred').opts(color='blue')
|
|
||||||
p *= hv.Area((xf, yp - 2 * s, yp + 2 * s),
|
|
||||||
vdims=['y', 'y2'],
|
|
||||||
label='2*std').opts(alpha=0.5, line_width=0)
|
|
||||||
|
|
||||||
# plot now line
|
|
||||||
p *= hv.VLine(now, label='now').opts(color='red', framewise=True)
|
|
||||||
return p.opts(title=f'Prediction at {now}. NLL={d.nll.mean().item():2.2f}')
|
|
||||||
|
|
||||||
|
|
||||||
dmap_pred = (hv.DynamicMap(plot_prediction_now, kdims=['t_source'])
|
|
||||||
.redim.values(t_source=ds_pred_block.t_source.to_pandas())
|
|
||||||
.opts(width=800,
|
|
||||||
height=300,
|
|
||||||
))
|
|
||||||
dmap_pred
|
|
||||||
# -
|
|
||||||
|
|
||||||
d = ds_preds.mean(['t_source', 'block'])['nll'].groupby('t_ahead_hours').mean()
|
|
||||||
nll_vs_tahead = hv.Curve((d.t_ahead_hours, d)).redim(x='hours ahead', y='nll').opts(width=800)
|
|
||||||
nll_vs_tahead
|
|
||||||
|
|
||||||
# +
|
|
||||||
# def plot_predictions_vs_time(it_ahead):
|
|
||||||
# """Plot predictions vs time with holoviews"""
|
|
||||||
|
|
||||||
# d = ds_pred_block.isel(t_ahead=it_ahead).groupby('t_source').first()
|
|
||||||
# # print(d)
|
|
||||||
|
|
||||||
# p = hv.Scatter({
|
|
||||||
# 'x': d.t_source,
|
|
||||||
# 'y': d.y_true
|
|
||||||
# }, label='true').opts(color='black')
|
|
||||||
|
|
||||||
# # Get arrays
|
|
||||||
# xf = d.t_source.values
|
|
||||||
# yp = d.y_pred
|
|
||||||
# s = d.y_pred_std
|
|
||||||
# p *= hv.Curve({
|
|
||||||
# 'x': xf,
|
|
||||||
# 'y': yp
|
|
||||||
# }, label='pred').opts(color='blue')
|
|
||||||
# p *= hv.Area((xf, yp - 2 * s, yp + 2 * s),
|
|
||||||
# vdims=['y', 'y2'],
|
|
||||||
# label='2*std').opts(alpha=0.5, line_width=0)
|
|
||||||
|
|
||||||
|
|
||||||
# return p.opts(title=f'Prediction at {it_ahead * pd.Timedelta(freq)} ahead. NLL={d.nll.mean().item():2.2f}')
|
|
||||||
|
|
||||||
|
|
||||||
# dmap_preds = (hv.DynamicMap(plot_predictions_vs_time, kdims=['it_ahead'])
|
|
||||||
# .redim.values(it_ahead=range(ds_pred_block.t_ahead.shape[0]))
|
|
||||||
# .opts(width=800,
|
|
||||||
# height=300,
|
|
||||||
# ))
|
|
||||||
# dmap_preds
|
|
||||||
# # TODO fixme
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
# d = ds_preds.mean(['t_ahead', 'block'])['nll'].groupby('t_source').mean()
|
|
||||||
# nll_vs_time = hv.Curve(d).opts(width=800)
|
|
||||||
# nll_vs_time
|
|
||||||
|
|
||||||
# +
|
|
||||||
# true_vs_pred = hv.Scatter((ds_preds.y_true, ds_preds.y_pred))
|
|
||||||
# dynspread(datashade(true_vs_pred))
|
|
||||||
# -
|
|
||||||
|
|
||||||
# # Summarize experiments
|
|
||||||
|
|
||||||
# # LR finder
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
# # Run learning rate finder
|
|
||||||
# lr_finder = trainer.tuner.lr_find(model)
|
|
||||||
|
|
||||||
# # Results can be found in
|
|
||||||
# lr_finder.results
|
|
||||||
|
|
||||||
# # Plot with
|
|
||||||
# fig = lr_finder.plot(suggest=True)
|
|
||||||
# fig.show()
|
|
||||||
|
|
||||||
# # Pick point based on plot, or get suggestion
|
|
||||||
# new_lr = lr_finder.suggestion()
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
File diff suppressed because one or more lines are too long
@@ -1,805 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# ---
|
|
||||||
# jupyter:
|
|
||||||
# jupytext:
|
|
||||||
# formats: ipynb,py:light
|
|
||||||
# text_representation:
|
|
||||||
# extension: .py
|
|
||||||
# format_name: light
|
|
||||||
# format_version: '1.5'
|
|
||||||
# jupytext_version: 1.6.0
|
|
||||||
# kernelspec:
|
|
||||||
# display_name: seq2seq-time
|
|
||||||
# language: python
|
|
||||||
# name: seq2seq-time
|
|
||||||
# ---
|
|
||||||
|
|
||||||
# # Sequence to Sequence Models for Timeseries Regression
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# In this notebook we are going to tackle a harder problem:
|
|
||||||
# - predicting the future on a timeseries
|
|
||||||
# - using an LSTM
|
|
||||||
# - with rough uncertainty (uncalibrated)
|
|
||||||
# - outputing sequence of predictions
|
|
||||||
#
|
|
||||||
# <img src="../reports/figures/Seq2Seq for regression.png" />
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# https://medium.com/@boitemailjeanmid/smart-meters-in-london-part1-description-and-first-insights-jean-michel-d-db97af2de71b
|
|
||||||
#
|
|
||||||
|
|
||||||
# OPTIONAL: Load the "autoreload" extension so that code can change. But blacklist large modules
|
|
||||||
# %load_ext autoreload
|
|
||||||
# %autoreload 2
|
|
||||||
# %aimport -pandas
|
|
||||||
# %aimport -torch
|
|
||||||
# %aimport -numpy
|
|
||||||
# %aimport -matplotlib
|
|
||||||
# %aimport -dask
|
|
||||||
# %aimport -tqdm
|
|
||||||
# %matplotlib inline
|
|
||||||
|
|
||||||
# +
|
|
||||||
# Imports
|
|
||||||
import torch
|
|
||||||
from torch import nn, optim
|
|
||||||
from torch.nn import functional as F
|
|
||||||
from torch.autograd import Variable
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
plt.rcParams['figure.figsize'] = (12.0, 3.0)
|
|
||||||
plt.style.use('ggplot')
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from tqdm.auto import tqdm
|
|
||||||
|
|
||||||
import pytorch_lightning as pl
|
|
||||||
# -
|
|
||||||
|
|
||||||
import warnings
|
|
||||||
warnings.simplefilter('once')
|
|
||||||
|
|
||||||
from seq2seq_time.data.dataset import Seq2SeqDataSet, Seq2SeqDataSets
|
|
||||||
from seq2seq_time.predict import predict, predict_multi
|
|
||||||
|
|
||||||
import logging, sys
|
|
||||||
# logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
||||||
|
|
||||||
# ## Parameters
|
|
||||||
|
|
||||||
# +
|
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
||||||
print(f'using {device}')
|
|
||||||
|
|
||||||
columns_target=['energy(kWh/hh)']
|
|
||||||
window_past = 48*2
|
|
||||||
window_future = 48*2
|
|
||||||
batch_size = 128
|
|
||||||
num_workers = 5
|
|
||||||
freq = '30T'
|
|
||||||
max_rows = 5e5
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
# ## Load data
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
def get_smartmeter_df(indir=Path('../data/raw/smart-meters-in-london'), max_files=8):
|
|
||||||
"""
|
|
||||||
Data loading and cleanding is always messy, so understand this code is optional.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Load csv files
|
|
||||||
csv_files = sorted((indir/'halfhourly_dataset').glob('*.csv'))[:max_files]
|
|
||||||
|
|
||||||
dfs = []
|
|
||||||
for f in csv_files:
|
|
||||||
df = (pd.read_csv(f, parse_dates=[1], na_values=['Null'])
|
|
||||||
.groupby('tstp')
|
|
||||||
.sum()
|
|
||||||
.sort_index()
|
|
||||||
)
|
|
||||||
df['block'] = f.stem
|
|
||||||
|
|
||||||
# Drop nan and 0's
|
|
||||||
df = df[df['energy(kWh/hh)']!=0]
|
|
||||||
df = df.dropna()
|
|
||||||
|
|
||||||
# Add time features
|
|
||||||
time = df.index.to_series()
|
|
||||||
df["month"] = time.dt.month
|
|
||||||
df['day'] = time.dt.day
|
|
||||||
df['week'] = time.dt.week
|
|
||||||
df['hour'] = time.dt.hour
|
|
||||||
df['minute'] = time.dt.minute
|
|
||||||
df['dayofweek'] = time.dt.dayofweek
|
|
||||||
|
|
||||||
# Load weather data
|
|
||||||
df_weather = pd.read_csv(indir/'weather_hourly_darksky.csv', parse_dates=[3])
|
|
||||||
use_cols = ['visibility', 'windBearing', 'temperature', 'time', 'dewPoint',
|
|
||||||
'pressure', 'apparentTemperature', 'windSpeed',
|
|
||||||
'humidity']
|
|
||||||
df_weather = df_weather[use_cols].set_index('time')
|
|
||||||
|
|
||||||
# Resample to match energy data
|
|
||||||
# Use first, since we have bearing, and you can't take mean
|
|
||||||
df_weather = df_weather.resample(freq).first().ffill()
|
|
||||||
|
|
||||||
# Join weather and energy data
|
|
||||||
df = pd.merge(df, df_weather, how='inner', left_index=True, right_index=True, sort=True)
|
|
||||||
|
|
||||||
# Holidays
|
|
||||||
df_hols = pd.read_csv(indir/'uk_bank_holidays.csv', parse_dates=[0])
|
|
||||||
holidays = set(df_hols['Bank holidays'].dt.round('D'))
|
|
||||||
def is_holiday(dt):
|
|
||||||
return dt in holidays
|
|
||||||
days = df.index.floor('D')
|
|
||||||
holiday_mapping = days.unique().to_series().apply(is_holiday).astype(int).to_dict()
|
|
||||||
df['holiday'] = days.to_series().map(holiday_mapping).values
|
|
||||||
|
|
||||||
# sort
|
|
||||||
df.index.name = 'Date'
|
|
||||||
df = df.loc['2012-09':] # Weird value before this
|
|
||||||
|
|
||||||
dfs.append(df)
|
|
||||||
|
|
||||||
return pd.concat(dfs)
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
# Our dataset is the london smartmeter data. But at half hour intervals
|
|
||||||
|
|
||||||
# +
|
|
||||||
df = get_smartmeter_df(max_files=12)
|
|
||||||
|
|
||||||
# # Just get the first one for now
|
|
||||||
# dfs = list(dfs)
|
|
||||||
|
|
||||||
# # df = df.resample(freq).first().dropna() # Where empty we will backfill, this will respect causality, and mostly maintain the mean
|
|
||||||
|
|
||||||
df = df.tail(int(max_rows)).copy() # Just use last X rows
|
|
||||||
# df = pd.concat(dfs[:6], 0)
|
|
||||||
# # df = dfs[0]
|
|
||||||
print(df.block.value_counts())
|
|
||||||
df
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ### Plot/explore
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
import holoviews as hv
|
|
||||||
from holoviews import opts
|
|
||||||
|
|
||||||
from holoviews.plotting.links import RangeToolLink
|
|
||||||
|
|
||||||
import datashader as ds
|
|
||||||
|
|
||||||
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize
|
|
||||||
from holoviews.operation import decimate
|
|
||||||
|
|
||||||
hv.extension('bokeh')
|
|
||||||
|
|
||||||
|
|
||||||
# def house_curve(Name=None):
|
|
||||||
# if isinstance(Name, int):
|
|
||||||
# name = df.block.unique()[Name]
|
|
||||||
# d = df[df.block == Name]
|
|
||||||
# d_curve = hv.Curve(d, 'Date', 'energy(kWh/hh)', label=Name).opts(framewise=True)
|
|
||||||
# return d_curve
|
|
||||||
|
|
||||||
|
|
||||||
# dmap = hv.DynamicMap(house_curve, kdims=['Name'])
|
|
||||||
# dmap = dmap.redim.values(Name=list(df.block.unique()))
|
|
||||||
# dynspread(datashade(dmap).opts(width=800,
|
|
||||||
# height=300,
|
|
||||||
# tools=['xwheel_zoom', 'pan'],
|
|
||||||
# active_tools=['xwheel_zoom', 'pan'],
|
|
||||||
# default_tools=['reset', 'save', 'hover']
|
|
||||||
# ))
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ### Profiling
|
|
||||||
|
|
||||||
# +
|
|
||||||
# from pandas_profiling import ProfileReport
|
|
||||||
# profile = ProfileReport(df, title="Pandas Profiling Report", minimal=True)
|
|
||||||
# profile
|
|
||||||
# -
|
|
||||||
|
|
||||||
# ### Norm
|
|
||||||
|
|
||||||
df.describe()
|
|
||||||
|
|
||||||
# +
|
|
||||||
import sklearn
|
|
||||||
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
|
|
||||||
from sklearn_pandas import DataFrameMapper
|
|
||||||
|
|
||||||
columns_input_numeric = list(df.drop(columns=columns_target)._get_numeric_data().columns)
|
|
||||||
columns_categorical = list(set(df.columns)-set(columns_input_numeric)-set(columns_target))
|
|
||||||
|
|
||||||
output_scalers = [([n], StandardScaler()) for n in columns_target]
|
|
||||||
transformers=output_scalers + \
|
|
||||||
[([n], StandardScaler()) for n in columns_input_numeric] + \
|
|
||||||
[([n], OrdinalEncoder()) for n in columns_categorical]
|
|
||||||
scaler = DataFrameMapper(transformers, df_out=True)
|
|
||||||
df_norm = scaler.fit_transform(df)
|
|
||||||
df_norm
|
|
||||||
# -
|
|
||||||
|
|
||||||
output_scaler = next(filter(lambda r:r[0][0] in columns_target, scaler.features))[-1]
|
|
||||||
output_scaler
|
|
||||||
|
|
||||||
# ### Split
|
|
||||||
|
|
||||||
# +
|
|
||||||
# split data, with the test in the future
|
|
||||||
|
|
||||||
d0 =df_norm.index.min()
|
|
||||||
d1 = df_norm.index.max()
|
|
||||||
split_time = d0+(d1-d0)*0.8
|
|
||||||
split_time = split_time.round('1D')
|
|
||||||
print(split_time)
|
|
||||||
df_train = df_norm.groupby('block').apply(lambda d:d.loc[:split_time]).reset_index(level=0, drop=True)
|
|
||||||
df_test = df_norm.groupby('block').apply(lambda d:d.loc[split_time:]).reset_index(level=0, drop=True)
|
|
||||||
# df_test
|
|
||||||
|
|
||||||
# +
|
|
||||||
# # Show split
|
|
||||||
# df_train['energy(kWh/hh)'].plot(label='train')
|
|
||||||
# df_test['energy(kWh/hh)'].plot(label='test')
|
|
||||||
# plt.ylabel('energy(kWh/hh)')
|
|
||||||
# plt.legend()
|
|
||||||
# -
|
|
||||||
|
|
||||||
# # Show split
|
|
||||||
scatter = dynspread(datashade(hv.Curve(df_train, kdims=['Date'], vdims=['energy(kWh/hh)', 'block']).groupby('block'), cmap='blue'))
|
|
||||||
scatter *= dynspread(datashade(hv.Curve(df_test, kdims=['Date'], vdims=['energy(kWh/hh)', 'block']).groupby('block'), cmap='red'))
|
|
||||||
scatter = scatter.opts(plot=dict(width=800))
|
|
||||||
scatter
|
|
||||||
|
|
||||||
# ### Dataset
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
# ### Dataset
|
|
||||||
# These are the columns that we wont know in the future
|
|
||||||
# We need to blank them out in x_future
|
|
||||||
columns_blank=['visibility',
|
|
||||||
'windBearing', 'temperature', 'dewPoint', 'pressure',
|
|
||||||
'apparentTemperature', 'windSpeed', 'humidity']
|
|
||||||
df_trains = [d.resample(freq).first().ffill().dropna() for _,d in df_train.groupby('block')]
|
|
||||||
df_tests = [d.resample(freq).first().ffill().dropna() for _,d in df_test.groupby('block')]
|
|
||||||
ds_train = Seq2SeqDataSets(df_trains,
|
|
||||||
window_past=window_past,
|
|
||||||
window_future=window_future,
|
|
||||||
columns_blank=columns_blank)
|
|
||||||
ds_test = Seq2SeqDataSets(df_tests,
|
|
||||||
window_past=window_past,
|
|
||||||
window_future=window_future,
|
|
||||||
columns_blank=columns_blank)
|
|
||||||
print(ds_train)
|
|
||||||
print(ds_test)
|
|
||||||
# -
|
|
||||||
# we can treat it like an array
|
|
||||||
ds_train[0]
|
|
||||||
len(ds_train)
|
|
||||||
ds_train[-1]
|
|
||||||
|
|
||||||
# +
|
|
||||||
# We can get rows
|
|
||||||
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
|
|
||||||
|
|
||||||
# Plot one instance, this is what the model sees
|
|
||||||
y_past['energy(kWh/hh)'].plot(label='past')
|
|
||||||
y_future['energy(kWh/hh)'].plot(ax=plt.gca(), label='future')
|
|
||||||
plt.legend()
|
|
||||||
plt.ylabel('energy(kWh/hh)')
|
|
||||||
|
|
||||||
# Notice we've added on two new columns tsp (time since present) and is_past
|
|
||||||
x_past.tail()
|
|
||||||
# -
|
|
||||||
|
|
||||||
# Notice we've hidden some future columns to prevent cheating
|
|
||||||
x_future.tail()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ## Plot helpers
|
|
||||||
|
|
||||||
# +
|
|
||||||
def plot_prediction(ds_preds, i):
|
|
||||||
"""Plot a prediction into the future, at a single point in time."""
|
|
||||||
d = ds_preds.isel(t_source=i)
|
|
||||||
|
|
||||||
# Get arrays
|
|
||||||
xf = d.t_target
|
|
||||||
yp = d.y_pred
|
|
||||||
s = d.y_pred_std
|
|
||||||
yt = d.y_true
|
|
||||||
now = d.t_source.squeeze()
|
|
||||||
|
|
||||||
|
|
||||||
plt.figure(figsize=(12, 4))
|
|
||||||
|
|
||||||
plt.scatter(xf, yt, label='true', c='k', s=6)
|
|
||||||
ylim = plt.ylim()
|
|
||||||
|
|
||||||
# plot prediction
|
|
||||||
plt.fill_between(xf, yp-2*s, yp+2*s, alpha=0.25,
|
|
||||||
facecolor="b",
|
|
||||||
interpolate=True,
|
|
||||||
label="2 std",)
|
|
||||||
plt.plot(xf, yp, label='pred', c='b')
|
|
||||||
|
|
||||||
# plot true
|
|
||||||
plt.scatter(
|
|
||||||
d.t_past,
|
|
||||||
d.y_past,
|
|
||||||
c='k',
|
|
||||||
s=6
|
|
||||||
)
|
|
||||||
|
|
||||||
# plot a red line for now
|
|
||||||
plt.vlines(x=now, ymin=0, ymax=1, label='now', color='r')
|
|
||||||
plt.ylim(*ylim)
|
|
||||||
|
|
||||||
now=pd.Timestamp(now.values)
|
|
||||||
plt.title(f'Prediction NLL={d.nll.mean().item():2.2g}')
|
|
||||||
plt.xlabel(f'{now.date()}')
|
|
||||||
plt.ylabel('energy(kWh/hh)')
|
|
||||||
plt.legend()
|
|
||||||
plt.xticks(rotation=45)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
def plot_performance(ds_preds, full=False):
|
|
||||||
"""Multiple plots using xr_preds"""
|
|
||||||
plot_prediction(ds_preds, 24)
|
|
||||||
|
|
||||||
ds_preds.mean('t_source').plot.scatter('t_ahead_hours', 'nll') # Mean over all predictions
|
|
||||||
n = len(ds_preds.t_source)
|
|
||||||
plt.ylabel('Negative Log Likelihood (lower is better)')
|
|
||||||
plt.xlabel('Hours ahead')
|
|
||||||
plt.title(f'NLL vs time ahead (no. samples={n})')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# Make a plot of the NLL over time. Does this solution get worse with time?
|
|
||||||
if full:
|
|
||||||
d = ds_preds.mean('t_ahead').groupby('t_source').mean().plot.scatter('t_source', 'nll')
|
|
||||||
plt.xticks(rotation=45)
|
|
||||||
plt.title('NLL over source time (lower is better)')
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# A scatter plot is easy with xarray
|
|
||||||
if full:
|
|
||||||
plt.figure(figsize=(5, 5))
|
|
||||||
ds_preds.plot.scatter('y_true', 'y_pred', s=.01)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def plot_hist(trainer):
|
|
||||||
try:
|
|
||||||
df_hist = pd.read_csv(trainer.logger.experiment.metrics_file_path)
|
|
||||||
df_hist['epoch'] = df_hist['epoch'].ffill()
|
|
||||||
df_histe = df_hist.set_index('epoch').groupby('epoch').mean()
|
|
||||||
if len(df_histe)>1:
|
|
||||||
df_histe[['loss/train', 'loss/val']].plot(title='history')
|
|
||||||
return df_histe
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# ## Lightning
|
|
||||||
|
|
||||||
# +
|
|
||||||
import pytorch_lightning as pl
|
|
||||||
|
|
||||||
class PL_MODEL(pl.LightningModule):
|
|
||||||
def __init__(self, model, lr=3e-4, patience=2, weight_decay=0):
|
|
||||||
super().__init__()
|
|
||||||
self._model = model
|
|
||||||
self.lr = lr
|
|
||||||
self.patience = patience
|
|
||||||
self.weight_decay = weight_decay
|
|
||||||
|
|
||||||
def forward(self, x_past, y_past, x_future, y_future=None):
|
|
||||||
"""Eval/Predict"""
|
|
||||||
y_dist, extra = self._model(x_past, y_past, x_future, y_future)
|
|
||||||
return y_dist, extra
|
|
||||||
|
|
||||||
def training_step(self, batch, batch_idx, phase='train'):
|
|
||||||
x_past, y_past, x_future, y_future = batch
|
|
||||||
y_dist, extra = self.forward(*batch)
|
|
||||||
loss = -y_dist.log_prob(y_future).mean()
|
|
||||||
self.log_dict({f'loss/{phase}':loss})
|
|
||||||
if ('loss' in extra) and (phase=='train'):
|
|
||||||
# some models have a special loss
|
|
||||||
loss = extra['loss']
|
|
||||||
self.log_dict({f'model_loss/{phase}':loss})
|
|
||||||
return loss
|
|
||||||
|
|
||||||
def validation_step(self, batch, batch_idx):
|
|
||||||
return self.training_step(batch, batch_idx, phase='val')
|
|
||||||
|
|
||||||
def configure_optimizers(self):
|
|
||||||
optim = torch.optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
|
|
||||||
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
|
||||||
optim,
|
|
||||||
patience=self.patience,
|
|
||||||
verbose=True,
|
|
||||||
min_lr=1e-7,
|
|
||||||
)
|
|
||||||
return {'optimizer': optim, 'lr_scheduler': scheduler, 'monitor': 'loss/val'}
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
# # Run
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
from pytorch_lightning.loggers import CSVLogger
|
|
||||||
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
# Init data
|
|
||||||
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
|
|
||||||
input_size = x_past.shape[-1]
|
|
||||||
output_size = y_future.shape[-1]
|
|
||||||
|
|
||||||
dl_train = DataLoader(ds_train,
|
|
||||||
batch_size=batch_size,
|
|
||||||
shuffle=True,
|
|
||||||
pin_memory=num_workers==0,
|
|
||||||
num_workers=num_workers)
|
|
||||||
dl_test = DataLoader(ds_test, batch_size=batch_size, num_workers=num_workers)
|
|
||||||
|
|
||||||
# +
|
|
||||||
import gc
|
|
||||||
|
|
||||||
def free_mem():
|
|
||||||
gc.collect()
|
|
||||||
torch.cuda.empty_cache()
|
|
||||||
gc.collect()
|
|
||||||
|
|
||||||
|
|
||||||
# -
|
|
||||||
|
|
||||||
from seq2seq_time.models.lstm_seq2seq import LSTMSeq2Seq
|
|
||||||
from seq2seq_time.models.lstm_seq import LSTMSeq
|
|
||||||
from seq2seq_time.models.lstm import LSTM
|
|
||||||
from seq2seq_time.models.baseline import BaselineLast
|
|
||||||
from seq2seq_time.models.transformer import Transformer
|
|
||||||
from seq2seq_time.models.transformer_autor import TransformerAutoR
|
|
||||||
from seq2seq_time.models.transformer_seq2seq import TransformerSeq2Seq
|
|
||||||
from seq2seq_time.models.transformer_seq import TransformerSeq
|
|
||||||
from seq2seq_time.models.neural_process import RANP
|
|
||||||
from seq2seq_time.models.transformer_process import TransformerProcess
|
|
||||||
# ## Plots
|
|
||||||
# +
|
|
||||||
# PL_MODEL(TransformerAutoR(input_size, output_size, hidden_out_size=32),
|
|
||||||
# patience=patience,
|
|
||||||
# lr=2e-5,
|
|
||||||
# weight_decay=1e-3)
|
|
||||||
# -
|
|
||||||
|
|
||||||
models = [
|
|
||||||
# TransformerAutoR2(input_size,
|
|
||||||
# output_size),
|
|
||||||
lambda: TransformerAutoR(input_size,
|
|
||||||
output_size, hidden_out_size=32),
|
|
||||||
lambda: RANP(input_size,
|
|
||||||
output_size, hidden_dim=32,
|
|
||||||
latent_dim=64, n_decoder_layers=4),
|
|
||||||
lambda: LSTM(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=80,
|
|
||||||
lstm_layers=3,
|
|
||||||
lstm_dropout=0.3),
|
|
||||||
lambda: LSTMSeq2Seq(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=64,
|
|
||||||
lstm_layers=2,
|
|
||||||
lstm_dropout=0.25),
|
|
||||||
lambda: TransformerSeq2Seq(input_size,
|
|
||||||
output_size,
|
|
||||||
hidden_size=128,
|
|
||||||
nhead=8,
|
|
||||||
nlayers=4,
|
|
||||||
attention_dropout=0.2),
|
|
||||||
lambda: Transformer(input_size,
|
|
||||||
output_size,
|
|
||||||
attention_dropout=0.2,
|
|
||||||
nhead=8,
|
|
||||||
nlayers=8,
|
|
||||||
hidden_size=128),
|
|
||||||
# lambda: TransformerSeq(input_size,
|
|
||||||
# output_size),
|
|
||||||
# lambda: LSTMSeq(input_size,
|
|
||||||
# output_size),
|
|
||||||
lambda :TransformerProcess(input_size,
|
|
||||||
output_size, hidden_size=16,
|
|
||||||
latent_dim=8, dropout=0.5,
|
|
||||||
nlayers=4,)
|
|
||||||
]
|
|
||||||
models
|
|
||||||
|
|
||||||
# Baseline model
|
|
||||||
pt_model = BaselineLast()
|
|
||||||
model = PL_MODEL(pt_model).to(device)
|
|
||||||
trainer = pl.Trainer(gpus=1,
|
|
||||||
max_epochs=1,
|
|
||||||
limit_train_batches=0.01,
|
|
||||||
logger=CSVLogger("logs",
|
|
||||||
name=type(pt_model).__name__),
|
|
||||||
)
|
|
||||||
trainer.fit(model, dl_train, dl_test)
|
|
||||||
print(plot_hist(trainer))
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*8,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
print(f'baseline nll: {ds_predss.nll.mean().item():2.2g}')
|
|
||||||
|
|
||||||
# ## Train
|
|
||||||
|
|
||||||
for m_fn in models:
|
|
||||||
pt_model = m_fn()
|
|
||||||
name = type(pt_model).__name__
|
|
||||||
print(name)
|
|
||||||
|
|
||||||
# Wrap in lightning
|
|
||||||
patience = 2
|
|
||||||
model = PL_MODEL(pt_model, patience=patience, lr=2e-5, weight_decay=1e-3).to(device)
|
|
||||||
|
|
||||||
# Trainer
|
|
||||||
trainer = pl.Trainer(gpus=1,
|
|
||||||
min_epochs=2,
|
|
||||||
max_epochs=30,
|
|
||||||
amp_level='O1',
|
|
||||||
precision=16,
|
|
||||||
gradient_clip_val=1,
|
|
||||||
logger=CSVLogger("logs",
|
|
||||||
name=type(pt_model).__name__),
|
|
||||||
callbacks=[
|
|
||||||
EarlyStopping(monitor='loss/val', patience=patience*2),
|
|
||||||
# PrintTableMetricsCallback2()
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
# Train
|
|
||||||
trainer.fit(model, dl_train, dl_test)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*2,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
|
|
||||||
print(name)
|
|
||||||
print(f'mean_NLL {ds_predss.nll.mean().item():2.2f}')
|
|
||||||
|
|
||||||
# Performance
|
|
||||||
ds_preds = ds_predss.isel(block=0)
|
|
||||||
print(plot_hist(trainer))
|
|
||||||
plot_performance(ds_preds)
|
|
||||||
|
|
||||||
model.cpu()
|
|
||||||
free_mem()
|
|
||||||
|
|
||||||
# # Plots
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
# Get latest checkpoint for a model type...
|
|
||||||
pt_model = models[1]()
|
|
||||||
name = type(pt_model).__name__
|
|
||||||
|
|
||||||
checkpoints = (Path('logs')/name).glob('version_*')
|
|
||||||
sort_checkpoints = lambda f:int(f.stem.split('_')[-1])
|
|
||||||
checkpoints = sorted(checkpoints, key=sort_checkpoints)
|
|
||||||
latest_checkpoint = checkpoints[-1]
|
|
||||||
checkpoint_f = sorted(latest_checkpoint.glob('checkpoints/*.ckpt'))[-1]
|
|
||||||
print('pt model name', name)
|
|
||||||
print('latest_checkpoint', checkpoint_f)
|
|
||||||
# -
|
|
||||||
|
|
||||||
# Load
|
|
||||||
model = PL_MODEL(pt_model).to(device)
|
|
||||||
model.load_from_checkpoint(str(checkpoint_f), model=pt_model)
|
|
||||||
|
|
||||||
ds_predss = predict_multi(model.to(device),
|
|
||||||
ds_test.datasets,
|
|
||||||
batch_size*4,
|
|
||||||
device=device,
|
|
||||||
scaler=output_scaler)
|
|
||||||
ds_predss.nll.mean().item()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ds_pred_block = ds_predss.isel(block=1)
|
|
||||||
|
|
||||||
# # holoviews pred
|
|
||||||
|
|
||||||
# +
|
|
||||||
import holoviews as hv
|
|
||||||
from holoviews import opts
|
|
||||||
|
|
||||||
import holoviews as hv
|
|
||||||
from holoviews import opts
|
|
||||||
|
|
||||||
import datashader as ds
|
|
||||||
from holoviews.operation.datashader import datashade, shade, dynspread, rasterize
|
|
||||||
from holoviews.operation import decimate
|
|
||||||
|
|
||||||
hv.extension('bokeh')
|
|
||||||
|
|
||||||
# +
|
|
||||||
# A few diagnostic plots
|
|
||||||
d_source = ds_predss.mean(['t_ahead',
|
|
||||||
'block'])['nll'].groupby('t_source').mean()
|
|
||||||
nll_vs_time = (hv.Curve(d_source).opts(width=600,
|
|
||||||
height=200,
|
|
||||||
title='Error vs time of prediction'))
|
|
||||||
|
|
||||||
d_ahead = ds_predss.mean(['t_source',
|
|
||||||
'block'])['nll'].groupby('t_ahead_hours').mean()
|
|
||||||
nll_vs_tahead = (hv.Curve(
|
|
||||||
(d_ahead.t_ahead_hours,
|
|
||||||
d_ahead)).redim(x='hours ahead',
|
|
||||||
y='nll').opts(width=600,
|
|
||||||
height=200,
|
|
||||||
title='Error vs time ahead'))
|
|
||||||
|
|
||||||
true_vs_pred = datashade(hv.Scatter(
|
|
||||||
(ds_predss.y_true,
|
|
||||||
ds_predss.y_pred))).redim(x='true', y='pred').opts(title='Scatter plot')
|
|
||||||
true_vs_pred = dynspread(true_vs_pred)
|
|
||||||
|
|
||||||
l = nll_vs_time + nll_vs_tahead + true_vs_pred
|
|
||||||
l.cols(1).opts(
|
|
||||||
framewise=True,
|
|
||||||
shared_axes=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
def hv_predict_from_time(t_source):
|
|
||||||
"""Plot predictions with holoviews"""
|
|
||||||
|
|
||||||
# Let us pass in an int
|
|
||||||
if isinstance(t_source, int):
|
|
||||||
t_source = ds_pred_block.t_source[t_source].to_pandas()
|
|
||||||
|
|
||||||
d = ds_pred_block.sel(t_source=t_source)
|
|
||||||
|
|
||||||
# Sometimes there are duplicate times, take the first
|
|
||||||
if len(d.t_source.shape) and d.t_source.shape[0] > 0:
|
|
||||||
d = d.isel(t_source=0)
|
|
||||||
if len(d.t_source.shape) and d.t_source.shape[0] == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
now = d.t_source.to_pandas()
|
|
||||||
|
|
||||||
# Plot true
|
|
||||||
x = np.concatenate([d.t_past, d.t_target])
|
|
||||||
yt = np.concatenate([d.y_past, d.y_true])
|
|
||||||
p = hv.Scatter({
|
|
||||||
'x': x,
|
|
||||||
'y': yt
|
|
||||||
}, label='true').opts(color='black')
|
|
||||||
|
|
||||||
# Get arrays
|
|
||||||
xf = d.t_target.values
|
|
||||||
yp = d.y_pred
|
|
||||||
s = d.y_pred_std
|
|
||||||
p *= hv.Curve({
|
|
||||||
'x': xf,
|
|
||||||
'y': yp
|
|
||||||
}, label='pred').opts(color='blue')
|
|
||||||
p *= hv.Area((xf, yp - 2 * s, yp + 2 * s),
|
|
||||||
vdims=['y', 'y2'],
|
|
||||||
label='2*std').opts(alpha=0.5, line_width=0)
|
|
||||||
|
|
||||||
# plot now line
|
|
||||||
p *= hv.VLine(now, label='now').opts(color='red', framewise=True)
|
|
||||||
return p.opts(title=f'Prediction at {now}. NLL={d.nll.mean().item():2.2f}')
|
|
||||||
|
|
||||||
|
|
||||||
dmap_hv_predict_from_time = (hv.DynamicMap(hv_predict_from_time, kdims=['t_source'])
|
|
||||||
.redim.values(t_source=ds_pred_block.t_source.to_pandas())
|
|
||||||
.opts(width=800,
|
|
||||||
height=300,
|
|
||||||
))
|
|
||||||
dmap_hv_predict_from_time
|
|
||||||
|
|
||||||
|
|
||||||
# +
|
|
||||||
def hv_plot_predictions_vs_time(it_ahead=6,
|
|
||||||
std=False,
|
|
||||||
ds_pred_block=ds_pred_block):
|
|
||||||
"""Plot predictions vs time with holoviews"""
|
|
||||||
|
|
||||||
d = ds_pred_block.isel(t_ahead=it_ahead).groupby('t_source').first()
|
|
||||||
|
|
||||||
p = hv.Scatter({
|
|
||||||
'x': d.t_source,
|
|
||||||
'y': d.y_true
|
|
||||||
}, label='true').opts(color='black', size=2)
|
|
||||||
|
|
||||||
# Get arrays
|
|
||||||
xf = d.t_source.values
|
|
||||||
yp = d.y_pred
|
|
||||||
s = d.y_pred_std
|
|
||||||
|
|
||||||
# Mean
|
|
||||||
p *= hv.Curve({'x': xf, 'y': yp}, label='pred').opts(color='blue')
|
|
||||||
if std:
|
|
||||||
p *= hv.Spread((xf, yp, s*2),
|
|
||||||
label='2*std').opts(alpha=0.5, line_width=0)
|
|
||||||
else:
|
|
||||||
p = datashade(p)
|
|
||||||
|
|
||||||
title = f'Prediction at {it_ahead * pd.Timedelta(freq)} ahead. NLL={d.nll.mean().item():2.2f}'
|
|
||||||
return p.opts(
|
|
||||||
title=title,
|
|
||||||
width=800,
|
|
||||||
height=300,
|
|
||||||
tools=['xwheel_zoom'],
|
|
||||||
active_tools=['xwheel_zoom', 'pan'],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
p = hv_plot_predictions_vs_time(
|
|
||||||
6, std=True, ds_pred_block=ds_pred_block.isel(t_source=slice(100, 4000)))
|
|
||||||
p
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # Summarize experiments
|
|
||||||
|
|
||||||
# # LR finder
|
|
||||||
|
|
||||||
# +
|
|
||||||
|
|
||||||
# # Run learning rate finder
|
|
||||||
# lr_finder = trainer.tuner.lr_find(model)
|
|
||||||
|
|
||||||
# # Results can be found in
|
|
||||||
# lr_finder.results
|
|
||||||
|
|
||||||
# # Plot with
|
|
||||||
# fig = lr_finder.plot(suggest=True)
|
|
||||||
# fig.show()
|
|
||||||
|
|
||||||
# # Pick point based on plot, or get suggestion
|
|
||||||
# new_lr = lr_finder.suggestion()
|
|
||||||
# -
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -28,6 +28,10 @@ class RegressionForecastData:
|
|||||||
|
|
||||||
# Check processing
|
# Check processing
|
||||||
self.check()
|
self.check()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def columns_past(self):
|
||||||
|
return set(self.df.columns)-set(self.columns_forecast)-set(self.columns_target)
|
||||||
|
|
||||||
def download(self) -> pd.DataFrame:
|
def download(self) -> pd.DataFrame:
|
||||||
"""Implement this method to download data and return raw df"""
|
"""Implement this method to download data and return raw df"""
|
||||||
@@ -54,8 +58,8 @@ class RegressionForecastData:
|
|||||||
|
|
||||||
def to_datasets(self, window_past: int, window_future: int, valid:bool=False) -> Tuple[Seq2SeqDataSet, Seq2SeqDataSet]:
|
def to_datasets(self, window_past: int, window_future: int, valid:bool=False) -> Tuple[Seq2SeqDataSet, Seq2SeqDataSet]:
|
||||||
"""Convert to torch datasets"""
|
"""Convert to torch datasets"""
|
||||||
ds_train = Seq2SeqDataSet(df_train, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
ds_train = Seq2SeqDataSet(self.df_train, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
||||||
ds_test = Seq2SeqDataSet(df_test, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
ds_test = Seq2SeqDataSet(self.df_test, window_past=window_past, window_future=window_future, columns_target=self.columns_target, columns_past=self.columns_past)
|
||||||
return ds_train, ds_test
|
return ds_train, ds_test
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
|
|||||||
assert df.index.freq is not None, 'should have freq'
|
assert df.index.freq is not None, 'should have freq'
|
||||||
assert_no_objects(df)
|
assert_no_objects(df)
|
||||||
|
|
||||||
self.df = df
|
self.df = df.dropna(subset=columns_target)
|
||||||
|
|
||||||
self.window_past = window_past
|
self.window_past = window_past
|
||||||
self.window_future = window_future
|
self.window_future = window_future
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import uptide
|
import uptide
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis
|
# https://en.wikipedia.org/wiki/Theory_of_tides#Harmonic_analysis
|
||||||
default_tidal_constituents = [
|
default_tidal_constituents = [
|
||||||
|
|||||||
Reference in New Issue
Block a user