mirror of
https://github.com/wassname/attentive-neural-processes.git
synced 2026-06-27 16:44:27 +08:00
multiple blocks
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
*.csv filter=lfs diff=lfs merge=lfs -text
|
||||
*.tsv filter=lfs diff=lfs merge=lfs -text
|
||||
@@ -5,6 +5,7 @@ events.out.*
|
||||
/optuna_result/
|
||||
/runs/
|
||||
/logs/
|
||||
.cache/
|
||||
|
||||
# Created by https://www.gitignore.io/api/code,linux,macos,python,windows,jupyternotebook,jupyternotebooks
|
||||
# Edit at https://www.gitignore.io/?templates=code,linux,macos,python,windows,jupyternotebook,jupyternotebooks
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,11 @@ from pathlib import Path
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
from diskcache import Cache
|
||||
|
||||
cache = Cache(".cache")
|
||||
|
||||
def npsample_batch(x, y, size=None, sort=True):
|
||||
"""Sample from numpy arrays along 2nd dim."""
|
||||
@@ -76,7 +81,7 @@ class SmartMeterDataSet(torch.utils.data.Dataset):
|
||||
rows = rows.sort_values('tstp')
|
||||
|
||||
# make sure tstp, which is our x axis, is the first value
|
||||
columns = ['tstp'] + list(set(rows.columns) - set(['tstp'])) + ['future']
|
||||
columns = ['tstp'] + list(set(rows.columns) - set(['tstp', 'block'])) + ['future']
|
||||
rows['future'] = 0.
|
||||
rows = rows[columns]
|
||||
|
||||
@@ -94,21 +99,14 @@ class SmartMeterDataSet(torch.utils.data.Dataset):
|
||||
def __len__(self):
|
||||
return len(self.df) - (self.num_context + self.num_extra_target)
|
||||
|
||||
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), use_logy=False):
|
||||
csv_files = sorted(Path('data/smart-meters-in-london/halfhourly_dataset').glob('*.csv'))[:1]
|
||||
df = pd.concat([pd.read_csv(f, parse_dates=[1], na_values=['Null']) for f in csv_files])
|
||||
# print(df.info())
|
||||
|
||||
df = df.groupby('tstp').mean()
|
||||
df['tstp'] = df.index
|
||||
df.index.name = ''
|
||||
|
||||
def load_weather_csv(infile):
|
||||
|
||||
# Load weather data
|
||||
df_weather = pd.read_csv(indir/'weather_hourly_darksky.csv', parse_dates=[3])
|
||||
|
||||
df_weather = pd.read_csv(infile, parse_dates=[3])
|
||||
use_cols = ['visibility', 'windBearing', 'temperature', 'time', 'dewPoint',
|
||||
'pressure', 'apparentTemperature', 'windSpeed',
|
||||
'humidity']
|
||||
'pressure', 'apparentTemperature', 'windSpeed',
|
||||
'humidity']
|
||||
df_weather = df_weather[use_cols].set_index('time')
|
||||
|
||||
# Resample to match energy data
|
||||
@@ -116,56 +114,91 @@ def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), use_logy=Fals
|
||||
|
||||
# Normalise
|
||||
weather_norms=dict(mean={'visibility': 11.2,
|
||||
'windBearing': 195.7,
|
||||
'temperature': 10.5,
|
||||
'dewPoint': 6.5,
|
||||
'pressure': 1014.1,
|
||||
'apparentTemperature': 9.2,
|
||||
'windSpeed': 3.9,
|
||||
'humidity': 0.8},
|
||||
'windBearing': 195.7,
|
||||
'temperature': 10.5,
|
||||
'dewPoint': 6.5,
|
||||
'pressure': 1014.1,
|
||||
'apparentTemperature': 9.2,
|
||||
'windSpeed': 3.9,
|
||||
'humidity': 0.8},
|
||||
std={'visibility': 3.1,
|
||||
'windBearing': 90.6,
|
||||
'temperature': 5.8,
|
||||
'dewPoint': 5.0,
|
||||
'pressure': 11.4,
|
||||
'apparentTemperature': 6.9,
|
||||
'windSpeed': 2.0,
|
||||
'humidity': 0.1})
|
||||
'windBearing': 90.6,
|
||||
'temperature': 5.8,
|
||||
'dewPoint': 5.0,
|
||||
'pressure': 11.4,
|
||||
'apparentTemperature': 6.9,
|
||||
'windSpeed': 2.0,
|
||||
'humidity': 0.1})
|
||||
|
||||
for col in df_weather.columns:
|
||||
df_weather[col] -= weather_norms['mean'][col]
|
||||
df_weather[col] /= weather_norms['std'][col]
|
||||
return df_weather
|
||||
|
||||
df = pd.concat([df, df_weather], 1).dropna()
|
||||
|
||||
def f2i(f: Path) -> int:
|
||||
"""block_2.csv->2"""
|
||||
return int(f.stem.split('_')[-1])
|
||||
|
||||
def is_test(f):
|
||||
return f2i(f) % 8 == 1
|
||||
|
||||
def is_val(f):
|
||||
return f2i(f) % 7==1
|
||||
|
||||
@cache.memoize()
|
||||
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=10, use_logy=False):
|
||||
|
||||
df_weather = load_weather_csv(indir/'weather_hourly_darksky.csv')
|
||||
|
||||
# Also find bank holidays
|
||||
df_hols = pd.read_csv(indir/'uk_bank_holidays.csv', parse_dates=[0])
|
||||
holidays = set(df_hols['Bank holidays'].dt.round('D'))
|
||||
|
||||
df['holiday'] = df.tstp.apply(lambda dt:dt.floor('D') in holidays).astype(int)
|
||||
def load_csv(f):
|
||||
df = pd.read_csv(f, parse_dates=[1], na_values=['Null'])
|
||||
|
||||
# Add time features
|
||||
time = df.tstp
|
||||
df["month"] = time.dt.month / 12.0
|
||||
df['day'] = time.dt.day / 310.0
|
||||
df['week'] = time.dt.week / 52.0
|
||||
df['hour'] = time.dt.hour / 24.0
|
||||
df['minute'] = time.dt.minute / 24.0
|
||||
df['dayofweek'] = time.dt.dayofweek / 7.0
|
||||
# Do a whole block as one series
|
||||
df = df.groupby('tstp').mean()
|
||||
df = df.sort_values('tstp')
|
||||
|
||||
# Drop nan and 0's
|
||||
df = df[df['energy(kWh/hh)'] != 0]
|
||||
df = df.dropna()
|
||||
df['block'] = f2i(f)
|
||||
|
||||
if use_logy:
|
||||
df['energy(kWh/hh)'] = np.log(df['energy(kWh/hh)']+1e-4)
|
||||
df = df.sort_values('tstp')
|
||||
# Drop nan and 0's
|
||||
df = df[df['energy(kWh/hh)'] != 0]
|
||||
df = df.dropna()
|
||||
# df.index.name = 'tstp'
|
||||
df['tstp'] = df.index
|
||||
|
||||
# join weather and holidays
|
||||
df = pd.concat([df, df_weather], 1).dropna()
|
||||
df['holiday'] = df.tstp.apply(lambda dt: dt.floor('D') in holidays).astype(int)
|
||||
|
||||
# Add time features
|
||||
time = df.tstp
|
||||
df["month"] = time.dt.month / 12.0
|
||||
df['day'] = time.dt.day / 310.0
|
||||
df['week'] = time.dt.week / 52.0
|
||||
df['hour'] = time.dt.hour / 24.0
|
||||
df['minute'] = time.dt.minute / 24.0
|
||||
df['dayofweek'] = time.dt.dayofweek / 7.0
|
||||
|
||||
if use_logy:
|
||||
df['energy(kWh/hh)'] = np.log(df['energy(kWh/hh)']+1e-4)
|
||||
return df
|
||||
|
||||
# split data
|
||||
test_split= -int(len(df) * 0.1)
|
||||
val_split= int(len(df) * 0.15)
|
||||
df_test = df[:val_split]
|
||||
df_train = df[val_split:test_split]
|
||||
df_val = df[test_split:]
|
||||
csv_files = list((indir / 'halfhourly_dataset').glob('*.csv'))
|
||||
csv_files.sort(key=f2i)
|
||||
csv_files = csv_files[:max_files]
|
||||
|
||||
test_files = [f for f in csv_files if is_test(f)]
|
||||
val_files = [f for f in csv_files if is_val(f) and (not is_test(f))]
|
||||
train_files = [f for f in csv_files if (not is_val(f)) and (not is_test(f))]
|
||||
print(len(train_files), len(val_files), len(test_files))
|
||||
print(train_files, val_files, test_files)
|
||||
assert not set(train_files).intersection(set(test_files), set(val_files))
|
||||
assert not set(test_files).intersection(set(val_files))
|
||||
|
||||
df_test = pd.concat([load_csv(f) for f in tqdm(test_files, desc='test csv')], 0)
|
||||
df_val = pd.concat([load_csv(f) for f in tqdm(val_files, desc='val csv')], 0)
|
||||
df_train = pd.concat([load_csv(f) for f in tqdm(train_files, desc='train csv')], 0)
|
||||
return df_train, df_val, df_test
|
||||
|
||||
@@ -114,7 +114,8 @@ def run_trial(
|
||||
model, trainer = main(
|
||||
trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
|
||||
)
|
||||
if number is None:
|
||||
checkpoints = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))
|
||||
if len(checkpoints)==0 or number is None:
|
||||
try:
|
||||
trainer.fit(model)
|
||||
except KeyboardInterrupt:
|
||||
@@ -147,6 +148,8 @@ def run_trial(
|
||||
plt.show()
|
||||
plot_from_loader(model.test_dataloader(), model, i=670, title='test 670')
|
||||
plt.show()
|
||||
else:
|
||||
print('no checkpoints')
|
||||
|
||||
try:
|
||||
trainer.test(model)
|
||||
|
||||
+509
-4222
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user