data images

2026-06-27 16:46:54 +08:00 · 2020-11-01 16:30:13 +08:00
parent 4aa8b3a52e
commit 6aca4a7e1e
8 changed files with 25 additions and 5 deletions
@@ -66,7 +66,21 @@ Using sequence to sequence interfaces for timeseries regression
  </tbody>
 </table>

-Project Organization
+## Datasets
+
+To ensure a robust score we use multiple multivariate regression timeseries.
+
+For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb)
+
+![](reports/figures/data_batches_appliances.png)
+![](reports/figures/data_batches_currents.png)
+![](reports/figures/data_batches_gas.png)
+![](reports/figures/data_batches_pm25.png)
+![](reports/figures/data_batches_traffice.png)
+
+
+
+## Project Organization
 ------------

    ├── LICENSE
@@ -84,7 +98,7 @@ Project Organization
    ├── reports            <- Generated analysis as HTML, PDF, LaTeX, etc.
    │   └── figures        <- Generated graphics and figures to be used in reporting
    │
-    ├── requirements.txt   <- The requirements file for reproducing the analysis environment, e.g.
+    ├── requirements       <- The requirements folder for reproducing the analysis environment, e.g.
    │                         generated with `pip freeze > requirements.txt`
    │
    ├── setup.py           <- makes project pip installable (pip install -e .) so src can be imported
@@ -96,3 +110,7 @@ Project Organization
 --------

 <p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>. #cookiecutterdatascience</small></p>
+
+```python
+
+```
@@ -82,6 +82,9 @@ class RegressionForecastData:
    def __repr__(self):
        return f'<{type(self).__name__} {self.df.shape if (self.df is not None) else None}>'

+    def __len__(self):
+        return len(self.df.dropna(subset=self.columns_target))
+
 class GasSensor(RegressionForecastData):
    """
    See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
@@ -330,7 +333,6 @@ class IMOSCurrentsVel(RegressionForecastData):
        # made in previous notebook
        xd = xr.load_dataset(outfile)
        df = xd.to_dataframe()
-        df['SPD'] = np.sqrt(df.VCUR**2 + df.UCUR**2)
        df = df[['VCUR',  'UCUR', 'WCUR', 'TEMP',  'DEPTH', 'M2',
       'S2', 'N2', 'K2', 'K1', 'O1', 'P1', 'Q1', 'M4', 'M6', 'S4', 'MK3', 'MM',
       'SSA', 'SA', 'SPD']]
@@ -340,6 +342,6 @@ class IMOSCurrentsVel(RegressionForecastData):
        has_past = df.SPD.isna().rolling(48).sum()<5
        df = df[has_past]

-        df = df.resample('10T').first()
+        df = df.resample('30T').mean()

        return df
@@ -31,7 +31,7 @@ class Seq2SeqDataSet(torch.utils.data.Dataset):
        assert df.index.freq is not None, 'should have freq'
        assert_no_objects(df)

-        self.freq = df.index.freq
+        self.freq = df.index.freq.freqstr
        self.df = df.dropna(subset=columns_target).ffill()

        self.window_past = window_past