mirror of
https://github.com/wassname/DeepTime.git
synced 2026-06-28 01:13:11 +08:00
183 lines
5.6 KiB
Python
183 lines
5.6 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import Optional, List, Union
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
class TimeFeature(ABC):
|
|
"""Abstract class for time features"""
|
|
def __init__(self, normalise: bool, a: float, b: float):
|
|
self.normalise = normalise
|
|
self.a = a
|
|
self.b = b
|
|
|
|
@abstractmethod
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
...
|
|
|
|
@property
|
|
@abstractmethod
|
|
def _max_val(self) -> float:
|
|
...
|
|
|
|
@property
|
|
def max_val(self) -> float:
|
|
return self._max_val if self.normalise else 1.0
|
|
|
|
def scale(self, val: np.ndarray) -> np.ndarray:
|
|
return val * (self.b - self.a) + self.a
|
|
|
|
def process(self, val: np.ndarray) -> np.ndarray:
|
|
features = self.scale(val / self.max_val)
|
|
if self.normalise:
|
|
return features
|
|
return features.astype(int)
|
|
|
|
def __repr__(self) -> str:
|
|
return f"{self.__class__.__name__}(normalise={self.normalise}, a={self.a}, b={self.b})"
|
|
|
|
|
|
class SecondOfMinute(TimeFeature):
|
|
"""Second of minute, unnormalised: [0, 59]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.second)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 59.0
|
|
|
|
|
|
class MinuteOfHour(TimeFeature):
|
|
"""Minute of hour, unnormalised: [0, 59]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.minute)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 59.0
|
|
|
|
|
|
class HourOfDay(TimeFeature):
|
|
"""Hour of day, unnormalised: [0, 23]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.hour)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 23.0
|
|
|
|
|
|
class DayOfWeek(TimeFeature):
|
|
"""Hour of day, unnormalised: [0, 6]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.dayofweek)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 6.0
|
|
|
|
|
|
class DayOfMonth(TimeFeature):
|
|
"""Day of month, unnormalised: [0, 30]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.day - 1)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 30.0
|
|
|
|
|
|
class DayOfYear(TimeFeature):
|
|
"""Day of year, unnormalised: [0, 365]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.dayofyear - 1)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 365.0
|
|
|
|
|
|
class WeekOfYear(TimeFeature):
|
|
"""Week of year, unnormalised: [0, 52]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(pd.Index(idx.isocalendar().week, dtype=int) - 1)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 52.0
|
|
|
|
class MonthOfYear(TimeFeature):
|
|
"""Month of year, unnormalised: [0, 11]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.month - 1)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 11.0
|
|
|
|
|
|
class QuarterOfYear(TimeFeature):
|
|
"""Quarter of year, unnormalised: [0, 3]"""
|
|
def __call__(self, idx: pd.DatetimeIndex) -> np.ndarray:
|
|
return self.process(idx.quarter - 1)
|
|
|
|
@property
|
|
def _max_val(self):
|
|
return 3.0
|
|
|
|
|
|
str_to_feat = {
|
|
# dictionary mapping name to TimeFeature function
|
|
'SecondOfMinute': SecondOfMinute,
|
|
'MinuteOfHour': MinuteOfHour,
|
|
'HourOfDay': HourOfDay,
|
|
'DayOfWeek': DayOfWeek,
|
|
'DayOfMonth': DayOfMonth,
|
|
'DayOfYear': DayOfYear,
|
|
'WeekOfYear': WeekOfYear,
|
|
'MonthOfYear': MonthOfYear,
|
|
'QuarterOfYear': QuarterOfYear,
|
|
}
|
|
|
|
|
|
freq_to_feats = {
|
|
# dictionary mapping frequency to list of TimeFeature functions
|
|
'q': [QuarterOfYear],
|
|
'm': [QuarterOfYear, MonthOfYear],
|
|
'w': [QuarterOfYear, MonthOfYear, WeekOfYear],
|
|
'd': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek],
|
|
'h': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay],
|
|
't': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay, MinuteOfHour],
|
|
's': [QuarterOfYear, MonthOfYear, WeekOfYear, DayOfYear, DayOfMonth, DayOfWeek, HourOfDay, MinuteOfHour, SecondOfMinute],
|
|
}
|
|
|
|
|
|
def get_time_features(dates: pd.DatetimeIndex, normalise: bool, a: Optional[float] = 0., b: Optional[float] = 1.,
|
|
features: Optional[Union[str, List[str]]] = None) -> np.ndarray:
|
|
"""
|
|
Returns a numpy array of date/time features based on either frequency or directly specifying a list of features.
|
|
:param dates: DatetimeIndex object of shape (time,)
|
|
:param normalise: Whether to normalise feature between [a, b]. If not, return as an int in the original feature range.
|
|
:param a: Lower bound of feature
|
|
:param b: Upper bound of feature
|
|
:param features: Frequency string used to obtain list of TimeFeatures, or directly a list of names of TimeFeatures
|
|
:return: np array of date/time features of shape (time, n_feats)
|
|
"""
|
|
if isinstance(features, list):
|
|
assert all([feat in str_to_feat.keys() for feat in features]), \
|
|
f"items in list should be one of {[*str_to_feat.keys()]}"
|
|
features = [str_to_feat[feat] for feat in features]
|
|
elif isinstance(features, str):
|
|
assert features in freq_to_feats.keys(), \
|
|
f"features should be one of {[*freq_to_feats.keys()]}"
|
|
features = freq_to_feats[features]
|
|
else:
|
|
raise ValueError(f"features should be a list or str, not a {type(features)}")
|
|
|
|
features = [feat(normalise, a, b)(dates) for feat in features]
|
|
|
|
if len(features) == 0:
|
|
return np.empty((dates.shape[0], 0))
|
|
return np.stack(features, axis=1)
|