mirror of
https://github.com/wassname/seq2seq-time.git
synced 2026-06-27 16:31:46 +08:00
datasets
This commit is contained in:
@@ -73,9 +73,21 @@ To ensure a robust score we use multiple multivariate regression timeseries.
|
||||
For more see [notebooks/01.0-mc-datasets.ipynb](notebooks/01.0-mc-datasets.ipynb)
|
||||
|
||||

|
||||
|
||||
30 minute, current speed at Two Rocks 200m Mooring. Has tidal periods as extra features.
|
||||
|
||||

|
||||
|
||||
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
|
||||
|
||||

|
||||
|
||||
Hourly PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
|
||||
|
||||

|
||||
|
||||
Hourly Minneapolis-St Paul, MN traffic volume for westbound I-94. Includes weather and holiday features from 2012-2018.
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
+1920
-202
File diff suppressed because one or more lines are too long
@@ -33,6 +33,7 @@ import matplotlib.pyplot as plt
|
||||
|
||||
from pathlib import Path
|
||||
from tqdm.auto import tqdm
|
||||
from IPython.display import display, HTML
|
||||
# -
|
||||
import warnings
|
||||
warnings.simplefilter('once')
|
||||
@@ -62,8 +63,8 @@ hv.renderer('bokeh').theme = ggplot_theme
|
||||
# print(f'using {device}')
|
||||
|
||||
window_past = 48*2
|
||||
window_future = 48*2
|
||||
batch_size = 128
|
||||
window_future = 48
|
||||
batch_size = 4
|
||||
datasets_root = Path('../data/processed/')
|
||||
# -
|
||||
|
||||
@@ -77,11 +78,24 @@ from seq2seq_time.data.data import IMOSCurrentsVel, AppliancesEnergyPrediction,
|
||||
datasets = [IMOSCurrentsVel, BejingPM25, GasSensor, AppliancesEnergyPrediction, MetroInterstateTraffic, ]
|
||||
datasets
|
||||
# -
|
||||
|
||||
|
||||
for dataset in datasets:
|
||||
d = dataset(datasets_root)
|
||||
display(HTML(f"<h3>{dataset.__name__}</h3>"))
|
||||
print(d.__doc__)
|
||||
print('columns_forecast', d.columns_forecast)
|
||||
print('columns_past', d.columns_past)
|
||||
print('columns_target', d.columns_target)
|
||||
print
|
||||
display(d.df)
|
||||
|
||||
# View train, test, val splits
|
||||
l = hv.Layout()
|
||||
for dataset in datasets:
|
||||
d = dataset(datasets_root)
|
||||
|
||||
|
||||
p = dynspread(
|
||||
datashade(hv.Scatter(d.df_train[d.columns_target[0]]),
|
||||
cmap='red'))
|
||||
@@ -91,10 +105,11 @@ for dataset in datasets:
|
||||
p *= dynspread(
|
||||
datashade(hv.Scatter(d.df_test[d.columns_target[0]]),
|
||||
cmap='blue'))
|
||||
p = p.opts(title=f"{dataset}")
|
||||
p = p.opts(title=f"{dataset.__name__}, n={len(d)}, freq={d.df.index.freq.freqstr}")
|
||||
display(p)
|
||||
|
||||
|
||||
|
||||
# +
|
||||
# plot a batch
|
||||
def plot_batch_y(ds, i):
|
||||
@@ -106,7 +121,7 @@ def plot_batch_y(ds, i):
|
||||
p *= hv.VLine(now).relabel('now').opts(color='red')
|
||||
return p
|
||||
|
||||
def plot_batches_y(dataset):
|
||||
def plot_batches_y(dataset, window_past=window_past, window_future=window_future):
|
||||
ds_name = type(dataset).__name__
|
||||
opts=dict(width=200, height=100, xaxis=None, yaxis=None)
|
||||
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
|
||||
@@ -119,22 +134,23 @@ def plot_batches_y(dataset):
|
||||
l += plot_batch_y(ds_train, i).opts(title=f'train {i}', **opts)
|
||||
l += plot_batch_y(ds_val, i).opts(title=f'val {i}', **opts)
|
||||
l += plot_batch_y(ds_test, i).opts(title=f'test {i}', **opts)
|
||||
return l.opts(shared_axes=False, toolbar='right', title=ds_name).cols(3)
|
||||
return l.opts(shared_axes=False, toolbar='right', title=f"{ds_name} freq={d.df.index.freq.freqstr}").cols(3)
|
||||
|
||||
# +
|
||||
|
||||
|
||||
# -
|
||||
|
||||
# View train, test, val splits
|
||||
for dataset in datasets:
|
||||
ds_name = type(dataset).__name__
|
||||
d = dataset(datasets_root)
|
||||
print(d)
|
||||
display(plot_batches_y(d))
|
||||
|
||||
|
||||
# +
|
||||
def plot_batch_x(ds, i):
|
||||
"""Plot input features"""
|
||||
x_past, y_past, x_future, y_future = ds.get_rows(10)
|
||||
x_past, y_past, x_future, y_future = ds.get_rows(i)
|
||||
x = pd.concat([x_past, x_future])
|
||||
p = hv.NdOverlay({
|
||||
col: hv.Curve(x[col]) for col in x.columns
|
||||
@@ -154,15 +170,10 @@ def plot_batches_x(d):
|
||||
|
||||
# -
|
||||
|
||||
# View train, test, val splits
|
||||
ds_train, ds_val, ds_test = d.to_datasets(window_past=window_past,
|
||||
window_future=window_future)
|
||||
|
||||
# View input columns
|
||||
for dataset in datasets:
|
||||
d = dataset(datasets_root)
|
||||
display(plot_batches_x(d))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -87,6 +87,8 @@ class RegressionForecastData:
|
||||
|
||||
class GasSensor(RegressionForecastData):
|
||||
"""
|
||||
A metal oxide (MOX) gas sensor exposed during 3 weeks to mixtures of carbon monoxide and humid synthetic air in a gas chamber.
|
||||
|
||||
See: http://archive.ics.uci.edu/ml/datasets/Gas+sensor+array+temperature+modulation
|
||||
"""
|
||||
|
||||
@@ -120,6 +122,8 @@ class GasSensor(RegressionForecastData):
|
||||
|
||||
class MetroInterstateTraffic(RegressionForecastData):
|
||||
"""
|
||||
Hourly traffic volume for Interstate 94 (I-94) in the U.S. state of Minnesota. Includes weather and holiday features from 2012-2018.
|
||||
|
||||
See: https://archive.ics.uci.edu/ml/datasets/Metro+Interstate+Traffic+Volume
|
||||
"""
|
||||
|
||||
@@ -158,6 +162,8 @@ class MetroInterstateTraffic(RegressionForecastData):
|
||||
|
||||
class AppliancesEnergyPrediction(RegressionForecastData):
|
||||
"""
|
||||
Appliances energy use in a low energy building.
|
||||
|
||||
See: https://archive.ics.uci.edu/ml/datasets/Appliances+energy+prediction
|
||||
"""
|
||||
|
||||
@@ -193,7 +199,12 @@ class AppliancesEnergyPrediction(RegressionForecastData):
|
||||
|
||||
class BejingPM25(RegressionForecastData):
|
||||
"""
|
||||
See: http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
|
||||
PM2.5 data of US Embassy in Beijing. This measures smoke as well as some pollen, fog, and dust particles of a certain size. Weather data from a nearby airport are included.
|
||||
|
||||
|
||||
See:
|
||||
- http://archive.ics.uci.edu/ml/datasets/Beijing+PM2.5+Data
|
||||
- https://en.wikipedia.org/wiki/Particulates
|
||||
"""
|
||||
|
||||
columns_target = ['log_pm2.5']
|
||||
@@ -305,11 +316,11 @@ def get_current_timeseries(
|
||||
class IMOSCurrentsVel(RegressionForecastData):
|
||||
"""
|
||||
|
||||
Current Speed at ANMN Two Rocks, WA, 204m mooring
|
||||
Current Speed at Two Rocks, Western Australia, with a water depth of 200 m. The mooring is located at Lat -31.719 Lon 115.03. Has tidal periods as features.
|
||||
|
||||
see:
|
||||
- https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/bbfc20d3-0e98-40a8-bd8a-3f7717eafb6d
|
||||
- http://thredds.aodn.org.au/thredds/fileServer/IMOS/ANMN/WA/WATR20/Velocity/
|
||||
from https://catalogue-imos.aodn.org.au/geonetwork/srv/api/records/ae86e2f5-eaaf-459e-a405-e654d85adb9c
|
||||
and http://thredds.aodn.org.au/thredds/catalog/IMOS/ANMN/WA/WATR20/Velocity/catalog.html
|
||||
And https://en.wikipedia.org/wiki/Theory_of_tides
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user