This commit is contained in:
wassname
2020-11-03 06:15:07 +08:00
parent 94c06cf913
commit 7da4a895fc
3 changed files with 15952 additions and 812 deletions
File diff suppressed because one or more lines are too long
@@ -376,15 +376,15 @@ def free_mem():
# + # +
# PARAMS: model # PARAMS: model
## Some datasets are easier, so we will vary the hidden size to predict overfitting ## Some datasets are easier, so we will vary the hidden size to predict overfitting
hidden_size={'IMOSCurrentsVel': 6, #? hidden_size={'IMOSCurrentsVel': 8, #?
'AppliancesEnergyPrediction': 6, # ? 'AppliancesEnergyPrediction': 8, # ?
'BejingPM25': 8, # OK 'BejingPM25': 8, # OK
'GasSensor': 8, # OK 'GasSensor': 8, # OK
'MetroInterstateTraffic': 16 # OK 'MetroInterstateTraffic': 16 # OK
} }
dropout=0.0 dropout=0.0
layers=6 layers=6
nhead=2 nhead=4
models = [ models = [
# lambda xs, ys: BaselineLast(), # lambda xs, ys: BaselineLast(),
@@ -392,7 +392,7 @@ models = [
lambda xs, ys, hidden_size: Transformer(xs, lambda xs, ys, hidden_size: Transformer(xs,
ys, ys,
attention_dropout=dropout, attention_dropout=dropout,
nhead=nhead*2, nhead=nhead,
nlayers=layers, nlayers=layers,
hidden_size=hidden_size), hidden_size=hidden_size),
@@ -403,7 +403,7 @@ models = [
lambda xs, ys, hidden_size:TCNSeq(xs, ys, hidden_size=hidden_size, nlayers=layers, dropout=dropout, kernel_size=2), lambda xs, ys, hidden_size:TCNSeq(xs, ys, hidden_size=hidden_size, nlayers=layers, dropout=dropout, kernel_size=2),
lambda xs, ys, hidden_size: RANP(xs, lambda xs, ys, hidden_size: RANP(xs,
ys, hidden_dim=hidden_size, dropout=dropout, ys, hidden_dim=hidden_size, dropout=dropout,
latent_dim=hidden_size//2, n_decoder_layers=layers), latent_dim=hidden_size//2, n_decoder_layers=layers, n_latent_encoder_layers=layers, n_det_encoder_layers=layers),
lambda xs, ys, hidden_size: TransformerSeq2Seq(xs, lambda xs, ys, hidden_size: TransformerSeq2Seq(xs,
ys, ys,
hidden_size=hidden_size, hidden_size=hidden_size,
@@ -419,7 +419,7 @@ models = [
lambda xs, ys, hidden_size: LSTMSeq2Seq(xs, lambda xs, ys, hidden_size: LSTMSeq2Seq(xs,
ys, ys,
hidden_size=hidden_size, hidden_size=hidden_size,
lstm_layers=layers, lstm_layers=layers//2,
lstm_dropout=dropout), lstm_dropout=dropout),
lambda xs, ys, hidden_size: CrossAttention(xs, lambda xs, ys, hidden_size: CrossAttention(xs,
ys, ys,
@@ -480,6 +480,49 @@ max_iters=20000
tensorboard_dir = Path(f"../outputs/{timestamp}").resolve() tensorboard_dir = Path(f"../outputs/{timestamp}").resolve()
print(f'For tensorboard run:\ntensorboard --logdir="{tensorboard_dir}"') print(f'For tensorboard run:\ntensorboard --logdir="{tensorboard_dir}"')
# +
# DEBUG: sanity check
for Dataset in datasets:
dataset_name = Dataset.__name__
dataset = Dataset(datasets_root)
ds_train, ds_val, ds_test = dataset.to_datasets(window_past=window_past,
window_future=window_future)
# Init data
x_past, y_past, x_future, y_future = ds_train.get_rows(10)
xs = x_past.shape[-1]
ys = y_future.shape[-1]
# Loaders
dl_train = DataLoader(ds_train,
batch_size=batch_size,
shuffle=True,
pin_memory=num_workers == 0,
num_workers=num_workers)
dl_val = DataLoader(ds_val,
shuffle=True,
batch_size=batch_size,
num_workers=num_workers)
for m_fn in models:
free_mem()
pt_model = m_fn(xs, ys, hidden_size[dataset_name])
model_name = type(pt_model).__name__
print(timestamp, dataset_name, model_name)
# Wrap in lightning
model = PL_MODEL(pt_model,
lr=3e-4
).to(device)
trainer = pl.Trainer(
fast_dev_run=True,
# GPU
gpus=1,
amp_level='O1',
precision=16,
)
# + # +
results = defaultdict(dict) results = defaultdict(dict)
@@ -630,7 +673,7 @@ for dataset in ds_predss.keys():
n += p.opts(title=dataset, legend_position='top_left') n += p.opts(title=dataset, legend_position='top_left')
n.cols(1).opts(shared_axes=False) n.cols(1).opts(shared_axes=False)
1/0
dataset='IMOSCurrentsVel' dataset='IMOSCurrentsVel'
data_i=844 data_i=844
@@ -646,26 +689,7 @@ n.cols(1)
# + # +
# plot_performance(ds_preds, full=True) # 1/0
# +
def plot_at_i(time_i, dataset, model):
d = ds_predss[dataset][model].isel(t_source=time_i)
return hv_plot_prediction(d).relabel(label=f"{model}")
dmap = hv.DynamicMap(plot_at_i, kdims=['t_source', 'dataset', 'model'])
t = ds_preds.t_source.values
models = list(next(iter(ds_predss.values())).keys())
dmap = dmap.redim.values(
t_source=range(len(t)),
dataset=list(ds_predss.keys()),
model=models,
)
dmap.opts(framewise=True)
# -
1/0
# + # +
# Explore predictions with dynamic map # Explore predictions with dynamic map
@@ -696,36 +720,6 @@ dmap
1/0 1/0
# +
# Explore predictions with dynamic map
def plot_predictions_ahead(dataset='IMOSCurrentsVel', model='', t_ahead_i=6, start=0, window_steps=1800):
d = next(iter(ds_predss[dataset].values())).isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
p = hv.Scatter({
'x': d.t_target,
'y': d.y_true
}, label='true').opts(color='black', framewise=True)
ds_preds = ds_predss[dataset][model]
d = ds_preds.isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
x = d.t_target
y = d.y_pred
s = d.y_pred_std
p *= hv.Curve({'x': x, 'y':y}, label=model).relabel(label=f"{model}")
p *= hv.Spread((x, y, s * 2),
label='2*std').opts(alpha=0.5, line_width=0)
p = p.opts(title=f"Dataset: {dataset}, model={model}, {d.freq}*{t_ahead_i} ahead", height=250, legend_position='top', ylabel=d.targets)
return p.opts(framewise=True)
dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 'model', 't_ahead_i', 'start', 'window_steps'])
dmap = dmap.redim.values(dataset=list(ds_predss.keys()), model=models)
dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
dmap = dmap.redim.default(t_ahead_i=10, window_steps=1000)
dmap
# -
# + # +
@@ -752,8 +746,54 @@ def plot_predictions_ahead(dataset='IMOSCurrentsVel', t_ahead_i=6, start=0, wind
dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 't_ahead_i', 'start', 'window_steps']) dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 't_ahead_i', 'start', 'window_steps'])
dmap = dmap.redim.values(dataset=list(ds_predss.keys())) dmap = dmap.redim.values(dataset=list(ds_predss.keys()))
dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000)) dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
dmap = dmap.redim.default(t_ahead_i=10, window_steps=400) dmap = dmap.redim.default(t_ahead_i=10, window_steps=400, dataset='IMOSCurrentsVel')
dmap dmap
# +
# def plot_at_i(time_i, dataset, model):
# d = ds_predss[dataset][model].isel(t_source=time_i)
# return hv_plot_prediction(d).relabel(label=f"{model}")
# dmap = hv.DynamicMap(plot_at_i, kdims=['t_source', 'dataset', 'model'])
# t = ds_preds.t_source.values
# models = list(next(iter(ds_predss.values())).keys())
# dmap = dmap.redim.values(
# t_source=range(len(t)),
# dataset=list(ds_predss.keys()),
# model=models,
# )
# dmap.opts(framewise=True)
# +
# plot_performance(ds_preds, full=True)
# +
# # Explore predictions with dynamic map
# def plot_predictions_ahead(dataset='IMOSCurrentsVel', model='', t_ahead_i=6, start=0, window_steps=1800):
# d = next(iter(ds_predss[dataset].values())).isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
# p = hv.Scatter({
# 'x': d.t_target,
# 'y': d.y_true
# }, label='true').opts(color='black', framewise=True)
# ds_preds = ds_predss[dataset][model]
# d = ds_preds.isel(t_ahead=t_ahead_i).isel(t_source=slice(start, start+window_steps))
# x = d.t_target
# y = d.y_pred
# s = d.y_pred_std
# p *= hv.Curve({'x': x, 'y':y}, label=model).relabel(label=f"{model}")
# p *= hv.Spread((x, y, s * 2),
# label='2*std').opts(alpha=0.5, line_width=0)
# p = p.opts(title=f"Dataset: {dataset}, model={model}, {d.freq}*{t_ahead_i} ahead", height=250, legend_position='top', ylabel=d.targets)
# return p.opts(framewise=True)
# dmap = hv.DynamicMap(plot_predictions_ahead, kdims=['dataset', 'model', 't_ahead_i', 'start', 'window_steps'])
# dmap = dmap.redim.values(dataset=list(ds_predss.keys()), model=models)
# dmap = dmap.redim.range(t_ahead_i=(0, window_future), start=(0, 5000), window_steps=(10, 5000))
# dmap = dmap.redim.default(t_ahead_i=10, window_steps=1000)
# dmap
# - # -
+8
View File
@@ -162,6 +162,7 @@ class LatentEncoder(nn.Module):
min_std=0.01, min_std=0.01,
batchnorm=False, batchnorm=False,
dropout=0, dropout=0,
nhead=8,
attention_dropout=0, attention_dropout=0,
attention_layers=2, attention_layers=2,
): ):
@@ -178,6 +179,7 @@ class LatentEncoder(nn.Module):
self._self_attention = Attention( self._self_attention = Attention(
hidden_dim, hidden_dim,
attention_layers, attention_layers,
n_heads=nhead,
rep="identity", rep="identity",
dropout=attention_dropout, dropout=attention_dropout,
) )
@@ -218,6 +220,7 @@ class DeterministicEncoder(nn.Module):
attention_layers=2, attention_layers=2,
batchnorm=False, batchnorm=False,
dropout=0, dropout=0,
nhead=8,
attention_dropout=0, attention_dropout=0,
): ):
super().__init__() super().__init__()
@@ -232,12 +235,14 @@ class DeterministicEncoder(nn.Module):
self._self_attention = Attention( self._self_attention = Attention(
hidden_dim, hidden_dim,
attention_layers, attention_layers,
n_heads=nhead,
rep="identity", rep="identity",
dropout=attention_dropout, dropout=attention_dropout,
) )
self._cross_attention = Attention( self._cross_attention = Attention(
hidden_dim, hidden_dim,
x_dim=x_dim, x_dim=x_dim,
n_heads=nhead,
attention_layers=attention_layers, attention_layers=attention_layers,
) )
@@ -325,6 +330,7 @@ class RANP(nn.Module):
use_deterministic_path=True, use_deterministic_path=True,
min_std=0.01, # To avoid collapse use a minimum standard deviation, should be much smaller than variation in labels min_std=0.01, # To avoid collapse use a minimum standard deviation, should be much smaller than variation in labels
dropout=0, dropout=0,
nhead=8,
attention_dropout=0, attention_dropout=0,
batchnorm=False, batchnorm=False,
attention_layers=2, attention_layers=2,
@@ -353,6 +359,7 @@ class RANP(nn.Module):
n_encoder_layers=n_latent_encoder_layers, n_encoder_layers=n_latent_encoder_layers,
attention_layers=attention_layers, attention_layers=attention_layers,
dropout=dropout, dropout=dropout,
nhead=nhead,
attention_dropout=attention_dropout, attention_dropout=attention_dropout,
batchnorm=batchnorm, batchnorm=batchnorm,
min_std=min_std, min_std=min_std,
@@ -365,6 +372,7 @@ class RANP(nn.Module):
n_d_encoder_layers=n_det_encoder_layers, n_d_encoder_layers=n_det_encoder_layers,
attention_layers=attention_layers, attention_layers=attention_layers,
dropout=dropout, dropout=dropout,
nhead=nhead,
batchnorm=batchnorm, batchnorm=batchnorm,
attention_dropout=attention_dropout, attention_dropout=attention_dropout,
) )