This commit is contained in:
wassname
2020-11-02 07:26:26 +08:00
parent 6aca4a7e1e
commit 77f9bc7e67
4 changed files with 1974 additions and 222 deletions
+12
View File
@@ -73,9 +73,21 @@ To ensure a robust score we use multiple multivariate regression timeseries.
For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb)
![](reports/figures/data_batches_appliances.png)
30 minute, current speed at Two Rocks 200m Mooring. Has tidal periods as extra features.
![](reports/figures/data_batches_currents.png)
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
![](reports/figures/data_batches_gas.png)
Hourly PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
![](reports/figures/data_batches_pm25.png)
Hourly Minneapolis-St Paul, MN traffic volume for westbound I-94. Includes weather and holiday features from 2012-2018.
![](reports/figures/data_batches_traffice.png)
File diff suppressed because one or more lines are too long
+28 -17
View File
@@ -33,6 +33,7 @@ import matplotlib.pyplot as plt
from pathlib import Path
from tqdm.auto import tqdm
from IPython.display import display, HTML
# -
import warnings
warnings.simplefilter('once')
@@ -62,8 +63,8 @@ hv.renderer('bokeh').theme = ggplot_theme
# print(f'using {device}')
window_past = 48*2
window_future = 48*2
batch_size = 128
window_future = 48
batch_size = 4
datasets_root = Path('../data/processed/')
# -
@@ -77,11 +78,24 @@ from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction,
datasets = [IMOSCurrentsVel, BejingPM25, GasSensor, AppliancesEnergyPrediction, MetroInterstateTraffic, ]
datasets
# -
for dataset in datasets:
d = dataset(datasets_root)
display(HTML(f"<h3>{dataset.__name__}</h3>"))
print(d.__doc__)
print('columns_forecast', d.columns_forecast)
print('columns_past', d.columns_past)
print('columns_target', d.columns_target)
print
display(d.df)
# View train, test, val splits
l = hv.Layout()
for dataset in datasets:
d = dataset(datasets_root)
p = dynspread(
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
cmap='red'))
@@ -91,10 +105,11 @@ for dataset in datasets:
p *= dynspread(
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
cmap='blue'))
p = p.opts(title=f"{dataset}")
p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
display(p)
# +
# plot a batch
def plot_batch_y(ds, i):
@@ -106,7 +121,7 @@ def plot_batch_y(ds, i):
p *= hv.VLine(now).relabel('now').opts(color='red')
return p
def plot_batches_y(dataset):
def plot_batches_y(dataset, window_past=window_past, window_future=window_future):
ds_name = type(dataset).__name__
opts=dict(width=200, height=100, xaxis=None, yaxis=None)
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
@@ -119,22 +134,23 @@ def plot_batches_y(dataset):
l += plot_batch_y(ds_train, i).opts(title=f'train {i}', **opts)
l += plot_batch_y(ds_val, i).opts(title=f'val {i}', **opts)
l += plot_batch_y(ds_test, i).opts(title=f'test {i}', **opts)
return l.opts(shared_axes=False, toolbar='right', title=ds_name).cols(3)
return l.opts(shared_axes=False, toolbar='right', title=f"{ds_name} freq={d.df.index.freq.freqstr}").cols(3)
# +
# -
# View train, test, val splits
for dataset in datasets:
ds_name = type(dataset).__name__
d = dataset(datasets_root)
print(d)
display(plot_batches_y(d))
# +
def plot_batch_x(ds, i):
"""Plot input features"""
x_past, y_past, x_future, y_future = ds.get_rows(10)
x_past, y_past, x_future, y_future = ds.get_rows(i)
x = pd.concat([x_past, x_future])
p = hv.NdOverlay({
col: hv.Curve(x[col]) for col in x.columns
@@ -154,15 +170,10 @@ def plot_batches_x(d):
# -
# View train, test, val splits
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
window_future=window_future)
# View input columns
for dataset in datasets:
d = dataset(datasets_root)
display(plot_batches_x(d))
+14 -3
View File
@@ -87,6 +87,8 @@ class RegressionForecastData:
class GasSensor(RegressionForecastData):
"""
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
"""
@@ -120,6 +122,8 @@ class GasSensor(RegressionForecastData):
class MetroInterstateTraffic(RegressionForecastData):
"""
Hourly traffic volume for Interstate 94 (I-94) in the U.S. state of Minnesota. Includes weather and holiday features from 2012-2018.
See: https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume
"""
@@ -158,6 +162,8 @@ class MetroInterstateTraffic(RegressionForecastData):
class AppliancesEnergyPrediction(RegressionForecastData):
"""
Appliances energy use in a low energy building.
See: https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
"""
@@ -193,7 +199,12 @@ class AppliancesEnergyPrediction(RegressionForecastData):
class BejingPM25(RegressionForecastData):
"""
See: http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
See:
- http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
- https://en.wikipedia.org/wiki/Particulates
"""
columns_target = ['log_pm2.5']
@@ -305,11 +316,11 @@ def get_current_timeseries(
class IMOSCurrentsVel(RegressionForecastData):
"""
Current Speed at ANMN Two Rocks, WA, 204m mooring
Current Speed at Two Rocks, Western Australia, with a water depth of 200 m. The mooring is located at Lat -31.719 Lon 115.03. Has tidal periods as features.
see:
- https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/bbfc20d3-0e98-40a8-bd8a-3f7717eafb6d
- http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/
from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c
and http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html
And https://en.wikipedia.org/wiki/Theory_of_tides
"""