first commit

This commit is contained in:
gorold
2022-07-13 16:03:34 +08:00
commit c2a9fa042c
67 changed files with 2693 additions and 0 deletions
View File
+59
View File
@@ -0,0 +1,59 @@
from typing import Optional, Dict
import logging
from os.path import join
import gin
import torch
from torch.utils.tensorboard import SummaryWriter
@gin.configurable()
class Checkpoint:
def __init__(self,
checkpoint_dir: str,
patience: Optional[int] = 7,
delta: Optional[float] = 0.):
self.checkpoint_dir = checkpoint_dir
self.model_path = join(checkpoint_dir, 'model.pth')
# early stopping
self.patience = patience
self.counter = 0
self.best_loss = float('inf')
self.early_stop = False
self.delta = delta
# logging
self.summary_writer = SummaryWriter(log_dir=checkpoint_dir)
def __call__(self,
epoch: int,
model: torch.nn.Module,
scalars: Optional[Dict[str, float]] = None):
for name, value in scalars.items():
# logging
self.summary_writer.add_scalar(name, value, epoch)
# early stopping
if name == 'Loss/Val':
val_loss = value
if val_loss <= self.best_loss + self.delta:
logging.info(
f"Validation loss decreased ({self.best_loss:.3f} --> {val_loss:.3f}). Saving model ...")
torch.save(model.state_dict(), self.model_path)
self.best_loss = val_loss
self.counter = 0
else:
self.counter += 1
logging.info(f"Validation loss increased ({self.best_loss:.3f} --> {val_loss:.3f}). "
f"Early stopping counter: {self.counter} out of {self.patience}")
if self.counter >= self.patience >= 0:
self.early_stop = True
self.summary_writer.flush()
def close(self, scores: Optional[Dict[str, float]] = None):
if scores is not None:
for name, value in scores.items():
self.summary_writer.add_scalar(name, value)
self.summary_writer.close()
+15
View File
@@ -0,0 +1,15 @@
from typing import Optional, Callable
from functools import partial
import torch
import torch.nn.functional as F
from torch import Tensor
def get_loss_fn(loss_name: str,
delta: Optional[float] = 1.0,
beta: Optional[float] = 1.0) -> Callable:
return {'mse': F.mse_loss,
'mae': F.l1_loss,
'huber': partial(F.huber_loss, delta=delta),
'smooth_l1': partial(F.smooth_l1_loss, beta=beta)}[loss_name]
+39
View File
@@ -0,0 +1,39 @@
import numpy as np
def rse(pred, true):
return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
def corr(pred, true):
u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
return (u / d).mean(-1)
def mae(pred, true):
return np.mean(np.abs(pred - true))
def mse(pred, true):
return np.mean((pred - true) ** 2)
def rmse(pred, true):
return np.sqrt(mse(pred, true))
def mape(pred, true):
return np.mean(np.abs((pred - true) / true))
def mspe(pred, true):
return np.mean(np.square((pred - true) / true))
def calc_metrics(pred, true):
return {'mae': mae(pred, true),
'mse': mse(pred, true),
'rmse': rmse(pred, true),
'mape': mape(pred, true),
'mspe': mspe(pred, true)}
+54
View File
@@ -0,0 +1,54 @@
from typing import Optional, Tuple
import numpy as np
import torch
from torch import Tensor
from einops import reduce
def default_device() -> torch.device:
"""
PyTorch default device is GPU when available, CPU otherwise.
:return: Default device.
"""
return torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def to_tensor(array: np.ndarray, to_default_device: Optional[bool] = True) -> Tensor:
"""
Convert numpy array to tensor on default device.
:param array: Numpy array to convert.
:param to_default_device Place tensor on default device or not.
:return: PyTorch tensor, optionally on default device.
"""
if to_default_device:
return torch.as_tensor(array, dtype=torch.float32).to(default_device())
def divide_no_nan(a, b):
"""
a/b where the resulted NaN or Inf are replaced by 0.
"""
mask = b == .0
b[mask] = 1.
result = a / b
result[mask] = .0
result[result != result] = .0
result[result == np.inf] = .0
return result
def scale(x: Tensor,
scaling_factor: Optional[Tensor] = None) -> Tuple[Tensor, Tensor]:
if scaling_factor is not None:
x = x / scaling_factor
return x, scaling_factor
scaling_factor = reduce(torch.abs(x).data, 'b t d -> b 1 d', 'mean')
scaling_factor[scaling_factor == 0.0] = 1.0
x = x / scaling_factor
return x, scaling_factor
def descale(forecast: Tensor, scaling_factor: Tensor) -> Tensor:
return forecast * scaling_factor
+182
View File
@@ -0,0 +1,182 @@
from abc import ABC, abstractmethod
from typing import Optional, List, Union
import numpy as np
import pandas as pd
class TimeFeature(ABC):
"""Abstract class for time features"""
def __init__(self, normalise: bool, a: float, b: float):
self.normalise = normalise
self.a = a
self.b = b
@abstractmethod
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
...
@property
@abstractmethod
def _max_val(self) -> float:
...
@property
def max_val(self) -> float:
return self._max_val if self.normalise else 1.0
def scale(self, val: np.ndarray) -> np.ndarray:
return val * (self.b - self.a) + self.a
def process(self, val: np.ndarray) -> np.ndarray:
features = self.scale(val / self.max_val)
if self.normalise:
return features
return features.astype(int)
def __repr__(self) -> str:
return f"{self.__class__.__name__}(normalise={self.normalise}, a={self.a}, b={self.b})"
class SecondOfMinute(TimeFeature):
"""Second of minute, unnormalised: [0, 59]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.second)
@property
def _max_val(self):
return 59.0
class MinuteOfHour(TimeFeature):
"""Minute of hour, unnormalised: [0, 59]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.minute)
@property
def _max_val(self):
return 59.0
class HourOfDay(TimeFeature):
"""Hour of day, unnormalised: [0, 23]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.hour)
@property
def _max_val(self):
return 23.0
class DayOfWeek(TimeFeature):
"""Hour of day, unnormalised: [0, 6]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.dayofweek)
@property
def _max_val(self):
return 6.0
class DayOfMonth(TimeFeature):
"""Day of month, unnormalised: [0, 30]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.day - 1)
@property
def _max_val(self):
return 30.0
class DayOfYear(TimeFeature):
"""Day of year, unnormalised: [0, 365]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.dayofyear - 1)
@property
def _max_val(self):
return 365.0
class WeekOfYear(TimeFeature):
"""Week of year, unnormalised: [0, 52]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(pd.Index(idx.isocalendar().week, dtype=int) - 1)
@property
def _max_val(self):
return 52.0
class MonthOfYear(TimeFeature):
"""Month of year, unnormalised: [0, 11]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.month - 1)
@property
def _max_val(self):
return 11.0
class QuarterOfYear(TimeFeature):
"""Quarter of year, unnormalised: [0, 3]"""
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
return self.process(idx.quarter - 1)
@property
def _max_val(self):
return 3.0
str_to_feat = {
# dictionary mapping name to TimeFeature function
'SecondOfMinute': SecondOfMinute,
'MinuteOfHour': MinuteOfHour,
'HourOfDay': HourOfDay,
'DayOfWeek': DayOfWeek,
'DayOfMonth': DayOfMonth,
'DayOfYear': DayOfYear,
'WeekOfYear': WeekOfYear,
'MonthOfYear': MonthOfYear,
'QuarterOfYear': QuarterOfYear,
}
freq_to_feats = {
# dictionary mapping frequency to list of TimeFeature functions
'q': [QuarterOfYear],
'm': [QuarterOfYear, MonthOfYear],
'w': [QuarterOfYear, MonthOfYear, WeekOfYear],
'd': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek],
'h': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay],
't': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay, MinuteOfHour],
's': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay, MinuteOfHour, SecondOfMinute],
}
def get_time_features(dates: pd.DatetimeIndex, normalise: bool, a: Optional[float] = 0., b: Optional[float] = 1.,
features: Optional[Union[str, List[str]]] = None) -> np.ndarray:
"""
Returns a numpy array of date/time features based on either frequency or directly specifying a list of features.
:param dates: DatetimeIndex object of shape (time,)
:param normalise: Whether to normalise feature between [a, b]. If not, return as an int in the original feature range.
:param a: Lower bound of feature
:param b: Upper bound of feature
:param features: Frequency string used to obtain list of TimeFeatures, or directly a list of names of TimeFeatures
:return: np array of date/time features of shape (time, n_feats)
"""
if isinstance(features, list):
assert all([feat in str_to_feat.keys() for feat in features]), \
f"items in list should be one of {[*str_to_feat.keys()]}"
features = [str_to_feat[feat] for feat in features]
elif isinstance(features, str):
assert features in freq_to_feats.keys(), \
f"features should be one of {[*freq_to_feats.keys()]}"
features = freq_to_feats[features]
else:
raise ValueError(f"features should be a list or str, not a {type(features)}")
features = [feat(normalise, a, b)(dates) for feat in features]
if len(features) == 0:
return np.empty((dates.shape[0], 0))
return np.stack(features, axis=1)