diff --git a/README.md b/README.md index bc213bc..221a239 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,21 @@ Using sequence to sequence interfaces for timeseries regression -Project Organization +## Datasets + +To ensure a robust score we use multiple multivariate regression timeseries. + +For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb) + +![](reports/figures/data_batches_appliances.png) +![](reports/figures/data_batches_currents.png) +![](reports/figures/data_batches_gas.png) +![](reports/figures/data_batches_pm25.png) +![](reports/figures/data_batches_traffice.png) + + + +## Project Organization ------------ ├── LICENSE @@ -84,7 +98,7 @@ Project Organization ├── reports <- Generated analysis as HTML, PDF, LaTeX, etc. │   └── figures <- Generated graphics and figures to be used in reporting │ - ├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g. + ├── requirements <- The requirements folder for reproducing the analysis environment, e.g. │ generated with `pip freeze > requirements.txt` │ ├── setup.py <- makes project pip installable (pip install -e .) so src can be imported @@ -96,3 +110,7 @@ Project Organization --------

Project based on the cookiecutter data science project template. #cookiecutterdatascience

+ +```python + +``` diff --git a/reports/figures/data_batches_appliances.png b/reports/figures/data_batches_appliances.png new file mode 100644 index 0000000..9bd42d7 Binary files /dev/null and b/reports/figures/data_batches_appliances.png differ diff --git a/reports/figures/data_batches_currents.png b/reports/figures/data_batches_currents.png new file mode 100644 index 0000000..3703f3c Binary files /dev/null and b/reports/figures/data_batches_currents.png differ diff --git a/reports/figures/data_batches_gas.png b/reports/figures/data_batches_gas.png new file mode 100644 index 0000000..92c41e0 Binary files /dev/null and b/reports/figures/data_batches_gas.png differ diff --git a/reports/figures/data_batches_pm25.png b/reports/figures/data_batches_pm25.png new file mode 100644 index 0000000..0ccf2e8 Binary files /dev/null and b/reports/figures/data_batches_pm25.png differ diff --git a/reports/figures/data_batches_traffic.png b/reports/figures/data_batches_traffic.png new file mode 100644 index 0000000..ad114e0 Binary files /dev/null and b/reports/figures/data_batches_traffic.png differ diff --git a/seq2seq_time/data/data.py b/seq2seq_time/data/data.py index c9620d5..e001ff4 100644 --- a/seq2seq_time/data/data.py +++ b/seq2seq_time/data/data.py @@ -82,6 +82,9 @@ class RegressionForecastData: def __repr__(self): return f'<{type(self).__name__} {self.df.shape if (self.df is not None) else None}>' + def __len__(self): + return len(self.df.dropna(subset=self.columns_target)) + class GasSensor(RegressionForecastData): """ See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation @@ -330,7 +333,6 @@ class IMOSCurrentsVel(RegressionForecastData): # made in previous notebook xd = xr.load_dataset(outfile) df = xd.to_dataframe() - df['SPD'] = np.sqrt(df.VCUR**2 + df.UCUR**2) df = df[['VCUR', 'UCUR', 'WCUR', 'TEMP', 'DEPTH', 'M2', 'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4', 'MK3', 'MM', 'SSA', 'SA', 'SPD']] @@ -340,6 +342,6 @@ class IMOSCurrentsVel(RegressionForecastData): has_past = df.SPD.isna().rolling(48).sum()<5 df = df[has_past] - df = df.resample('10T').first() + df = df.resample('30T').mean() return df diff --git a/seq2seq_time/data/dataset.py b/seq2seq_time/data/dataset.py index 7065d89..7f2ff7c 100644 --- a/seq2seq_time/data/dataset.py +++ b/seq2seq_time/data/dataset.py @@ -31,7 +31,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset): assert df.index.freq is not None, 'should have freq' assert_no_objects(df) - self.freq = df.index.freq + self.freq = df.index.freq.freqstr self.df = df.dropna(subset=columns_target).ffill() self.window_past = window_past