This commit is contained in:
wassname
2020-10-27 06:43:50 +08:00
parent 6eda47b76f
commit 052fd6596c
9 changed files with 2920 additions and 21 deletions
+14 -18
View File
@@ -7,9 +7,11 @@ from sklearn_pandas import DataFrameMapper
import xarray as xr
import pandas as pd
import numpy as np
import zipfile
from .dataset import Seq2SeqDataSet
from .util import normalize_encode_dataframe, timeseries_split
from ..util import dset_to_nc
from .tidal import generate_tidal_periods
@@ -77,20 +79,18 @@ class GasSensor(RegressionForecastData):
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/00487/gas-sensor-array-temperature-modulation.zip'
# download if needed
extract_path = self.datasets_root/'GasSensor'
files = sorted(extract_path.glob('*.csv'))
if len(files)<13:
print('download_and_extract_archive')
download_and_extract_archive(url, self.datasets_root, extract_path)
# extract_path = self.datasets_root/'gas-sensor-array-temperature-modulation.zip'
download_url(url, self.datasets_root)
# Load csv's
files = sorted(extract_path.glob('*.csv'))
dfs = []
for f in files:
now = pd.to_datetime(f.stem, format='%Y%m%d_%H%M%S')
df = pd.read_csv(f)
df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
dfs.append(df)
# Load csv's from inside zip
zf = zipfile.ZipFile(self.datasets_root / 'gas-sensor-array-temperature-modulation.zip')
dfs=[]
for f in zf.namelist():
if f.endswith('.csv'):
now = pd.to_datetime(Pdset_to_ncath(f).stem, format='%Y%m%d_%H%M%S')
df = pd.read_csv(zf.open(f))
df.index = pd.to_timedelta(df['Time (s)'], unit='s') + now
dfs.append(df)
self.df = pd.concat(dfs).dropna(subset=self.columns_target)
df = df[[ 'CO (ppm)', 'Humidity (%r.h.)', 'Temperature (C)',
@@ -272,11 +272,7 @@ def get_current_timeseries(
# Add tidal freqs
xd = xd.merge(df_eta)
# Cache to nc
xd.to_netcdf(outfile)
print(
f'wrote "{outfile}" with size {outfile.stat().st_size*1e-6:2.2f} MB'
)
dset_to_nc(xd, outfile)
return outfile
+2 -2
View File
@@ -31,7 +31,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
assert df.index.freq is not None, 'should have freq'
assert_no_objects(df)
self.df = df.dropna(subset=columns_target)
self.df = df.dropna(subset=columns_target).ffill()
self.window_past = window_past
self.window_future = window_future
@@ -100,7 +100,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
def __repr__(self):
t = self.df.index
return f'<{type(self).__name__}(shape={self.df.shape}, times={t[0]} to {t[1]} at {t.freq.freqstr})>'
return f'<{type(self).__name__}(shape={self.df.shape}, times={t[0]} to {t[1]})>'
class Seq2SeqDataSets(torch.utils.data.Dataset):
+1 -1
View File
@@ -16,4 +16,4 @@ def normalize_encode_dataframe(df, encoder=OrdinalEncoder):
def timeseries_split(df, test_fraction=0.2):
"""Split timeseries data with test in the future"""
i = int(len(df)*test_fraction)
return df.iloc[:i], df.iloc[i:]
return df.iloc[:-i], df.iloc[-i:]
+23
View File
@@ -0,0 +1,23 @@
import numpy as np
EPSILON = 1e-10
def _error(actual: np.ndarray, predicted: np.ndarray):
""" Simple error """
return actual - predicted
def mse(actual: np.ndarray, predicted: np.ndarray):
""" Mean Squared Error """
return np.mean(np.square(_error(actual, predicted)))
def rmse(actual: np.ndarray, predicted: np.ndarray):
""" Root Mean Squared Error """
return np.sqrt(mse(actual, predicted))
def smape(actual: np.ndarray, predicted: np.ndarray):
"""
Symmetric Mean Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON))
+146
View File
@@ -0,0 +1,146 @@
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
class Chomp1d(nn.Module):
def __init__(self, chomp_size):
super(Chomp1d, self).__init__()
self.chomp_size = chomp_size
def forward(self, x):
return x[:, :, : -self.chomp_size].contiguous()
class Conv(nn.Module):
"""Causal convolution layer."""
def __init__(
self,
n_inputs,
n_outputs,
kernel_size,
stride,
dilation,
padding,
causal=True,
):
super().__init__()
self.conv = nn.Conv1d(
n_inputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.chomp = Chomp1d(padding)
self.causal = causal
def forward(self, x):
out = self.conv(x)
if self.causal:
out = self.chomp(out)
return out
class TemporalBlock(nn.Module):
def __init__(
self,
n_inputs,
n_outputs,
kernel_size,
stride,
dilation,
padding,
dropout=0.2,
):
super(TemporalBlock, self).__init__()
self.conv1 = Conv(
n_inputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
self.conv2 = Conv(
n_outputs,
n_outputs,
kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
)
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(dropout)
self.net = nn.Sequential(
self.conv1, self.relu1, self.dropout1, self.conv2, self.relu2, self.dropout2
)
self.downsample = (
Conv(
n_inputs,
n_outputs,
1,
stride=1,
padding=0,
dilation=1,
causal=False,
)
if n_inputs != n_outputs
else None
)
self.relu = nn.ReLU()
def forward(self, x):
out = x
for i, l in enumerate(self.net):
out = l(out)
res = x if self.downsample is None else self.downsample(x)
return self.relu(out + res)
class TemporalConvNet(nn.Module):
"""
See:
- https://arxiv.org/pdf/1803.01271.pdf
- https://github.com/locuslab/TCN
"""
def __init__(
self,
num_inputs,
num_channels,
num_embeddings=0,
kernel_size=2,
dropout=0.2,
embedding_dim=2,
):
super(TemporalConvNet, self).__init__()
layers = []
num_levels = len(num_channels)
for i in range(num_levels):
dilation_size = 2 ** i
in_channels = num_inputs if i == 0 else num_channels[i - 1]
out_channels = num_channels[i]
layers += [
TemporalBlock(
in_channels,
out_channels,
kernel_size,
stride=1,
dilation=dilation_size,
padding=(kernel_size - 1) * dilation_size,
dropout=dropout,
)
]
self.network = nn.Sequential(*layers)
def forward(self, x):
out = x
for l in self.network:
out = l(out)
return out
+11
View File
@@ -1,6 +1,9 @@
from pathlib import Path
import torch
import xarray as xr
import logging
logger = logging.getLogger(__file__)
project_dir = Path(__file__).parent.parent
def to_numpy(x):
@@ -12,3 +15,11 @@ def to_numpy(x):
def mask_upper_triangular(N, device):
"""Causal attention."""
return torch.triu(torch.ones(N, N), diagonal=1).to(device).bool()
def dset_to_nc(dset, f, engine="netcdf4", compression={"zlib": True}):
if isinstance(dset, xr.DataArray):
dset = dset.to_dataset(name="data")
encoding = {k: {"zlib": True} for k in dset.data_vars}
logger.info(f"saving to {f}")
dset.to_netcdf(f, engine=engine, encoding=encoding)
logger.info(f"Wrote {f.stem}.nc size={f.stat().st_size/1e6} M")