mirror of
https://github.com/wassname/attentive-neural-processes.git
synced 2026-06-27 18:03:39 +08:00
update notebooks
This commit is contained in:
+146
-11660
File diff suppressed because one or more lines are too long
+278
-119
File diff suppressed because one or more lines are too long
+277
-5502
File diff suppressed because one or more lines are too long
+42
-239
File diff suppressed because one or more lines are too long
+250
-2091
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -23,7 +23,7 @@ def collate_fns(max_num_context, max_num_extra_target, sample, sort=True, contex
|
||||
x = torch.from_numpy(x).float()
|
||||
y = torch.from_numpy(y).float()
|
||||
|
||||
# Last feature will show how far in time a point is from out last context
|
||||
# Last feature will show how far in time a point is from our last context
|
||||
assert (np.diff(x[:, :, 0], 1)>=0).all(), 'first features should be ordered e.g. seconds'
|
||||
assert (x[:, max_num_context, -1]==0.).all(), 'last features should be empty'
|
||||
time = x[:, :, 0]
|
||||
|
||||
@@ -174,13 +174,13 @@ class LatentModelPL(pl.LightningModule):
|
||||
def add_suggest(trial):
|
||||
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
|
||||
|
||||
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(6)])
|
||||
trial.suggest_categorical("latent_dim", [8*2**i for i in range(6)])
|
||||
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(8)])
|
||||
trial.suggest_categorical("latent_dim", [8*2**i for i in range(8)])
|
||||
|
||||
trial.suggest_int("attention_layers", 1, 4)
|
||||
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 8])
|
||||
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 8])
|
||||
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 8])
|
||||
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 6, 8, 12])
|
||||
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 6, 8, 12])
|
||||
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 6, 8, 12])
|
||||
trial.suggest_int("num_heads", 8, 8)
|
||||
|
||||
trial.suggest_uniform("dropout", 0, 0.9)
|
||||
|
||||
@@ -22,6 +22,10 @@ from torchvision.transforms import ToTensor
|
||||
from src.data.smart_meter import get_smartmeter_df
|
||||
|
||||
from src.utils import ObjectDict
|
||||
from torch.utils.data._utils.collate import default_collate
|
||||
|
||||
def collate_fn(batch, sample=None):
|
||||
return default_collate(batch)
|
||||
|
||||
def log_prob_sigma(value, loc, log_scale):
|
||||
"""A slightly more stable (not confirmed yet) log prob taking in log_var instead of scale.
|
||||
@@ -220,6 +224,7 @@ class LSTM_PL(pl.LightningModule):
|
||||
dset_train,
|
||||
batch_size=self.hparams.batch_size,
|
||||
shuffle=True,
|
||||
collate_fn=collate_fn,
|
||||
num_workers=self.hparams.num_workers,
|
||||
)
|
||||
|
||||
@@ -231,9 +236,10 @@ class LSTM_PL(pl.LightningModule):
|
||||
self.hparams,
|
||||
label_names=["energy(kWh/hh)"],
|
||||
train=False,
|
||||
|
||||
transforms=transforms.ToTensor(),
|
||||
)
|
||||
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
|
||||
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False,collate_fn=collate_fn,)
|
||||
|
||||
@pl.data_loader
|
||||
def test_dataloader(self):
|
||||
@@ -243,9 +249,10 @@ class LSTM_PL(pl.LightningModule):
|
||||
self.hparams,
|
||||
label_names=["energy(kWh/hh)"],
|
||||
train=False,
|
||||
|
||||
transforms=transforms.ToTensor(),
|
||||
)
|
||||
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
|
||||
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False, collate_fn=collate_fn,)
|
||||
|
||||
@staticmethod
|
||||
def add_suggest(trial):
|
||||
|
||||
@@ -44,7 +44,9 @@ class TransformerSeq2SeqNet(nn.Module):
|
||||
self._min_std = _min_std
|
||||
|
||||
hidden_out_size = self.hparams.hidden_out_size
|
||||
self.enc_norm = BatchNormSequence(self.hparams.input_size)
|
||||
self.enc_emb = nn.Linear(self.hparams.input_size, hidden_out_size)
|
||||
encoder_norm = nn.LayerNorm(hidden_out_size)
|
||||
layer_enc = nn.TransformerEncoderLayer(
|
||||
d_model=hidden_out_size,
|
||||
dim_feedforward=self.hparams.hidden_size,
|
||||
@@ -55,9 +57,11 @@ class TransformerSeq2SeqNet(nn.Module):
|
||||
self.encoder = nn.TransformerEncoder(
|
||||
layer_enc,
|
||||
num_layers=self.hparams.nlayers,
|
||||
norm=encoder_norm
|
||||
)
|
||||
|
||||
|
||||
self.dec_norm = BatchNormSequence(self.hparams.input_size_decoder)
|
||||
self.dec_emb = nn.Linear(self.hparams.input_size_decoder, hidden_out_size)
|
||||
layer_dec = nn.TransformerDecoderLayer(
|
||||
d_model=hidden_out_size,
|
||||
@@ -65,26 +69,38 @@ class TransformerSeq2SeqNet(nn.Module):
|
||||
dropout=self.hparams.attention_dropout,
|
||||
nhead=self.hparams.nhead,
|
||||
)
|
||||
decoder_norm = nn.LayerNorm(hidden_out_size)
|
||||
self.decoder = nn.TransformerDecoder(
|
||||
layer_dec,
|
||||
num_layers=self.hparams.nlayers,
|
||||
norm=decoder_norm
|
||||
)
|
||||
self.mean = nn.Linear(hidden_out_size, self.hparams.output_size)
|
||||
self.std = nn.Linear(hidden_out_size, self.hparams.output_size)
|
||||
|
||||
# self._reset_parameters()
|
||||
|
||||
|
||||
def _reset_parameters(self):
|
||||
r"""Initiate parameters in the transformer model."""
|
||||
|
||||
for p in self.parameters():
|
||||
if p.dim() > 1:
|
||||
torch.nn.init.xavier_uniform_(p)
|
||||
|
||||
def forward(self, context_x, context_y, target_x, target_y=None):
|
||||
x = torch.cat([context_x, context_y], -1)
|
||||
# Size([B, C, input_dim])
|
||||
x = self.enc_emb(x)
|
||||
x = self.enc_emb(self.enc_norm(x))
|
||||
# Size([B, C, emb_dim])
|
||||
memory = self.encoder(x)
|
||||
# Size([B, C, emb_dim])
|
||||
target_x = self.dec_emb(target_x)
|
||||
target_x = self.dec_emb(self.dec_norm(target_x))
|
||||
# Size([B, T, input_target_dim]) -> Size([B, T, emb_dim])
|
||||
|
||||
# In transformers the memory and target_x need to be the same length. Lets use a permutation invariant agg on the context
|
||||
# Then expand it, so it's available as we decode, conditional on target_x
|
||||
memory = memory.mean(dim=1, keepdim=True).expand_as(target_x)
|
||||
memory = memory.max(dim=1, keepdim=True)[0].expand_as(target_x)
|
||||
|
||||
outputs = self.decoder(target_x, memory)
|
||||
# Size([B, T, emb_dim])
|
||||
@@ -266,7 +282,7 @@ class TransformerSeq2Seq_PL(pl.LightningModule):
|
||||
trial.suggest_uniform("attention_dropout", 0, 0.75)
|
||||
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
|
||||
trial.suggest_categorical("hidden_out_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
|
||||
trial.suggest_categorical("nlayers", [1, 2, 4, 8])
|
||||
trial.suggest_categorical("nlayers", [1, 2, 4, 6, 8, 16, 32])
|
||||
trial.suggest_categorical("nhead", [1, 2, 8, 16])
|
||||
|
||||
trial._user_attrs = {
|
||||
|
||||
+17
-7
@@ -94,15 +94,19 @@ def run_trial(
|
||||
model, trainer = main(
|
||||
trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
|
||||
)
|
||||
trainer.fit(model)
|
||||
try:
|
||||
trainer.fit(model)
|
||||
except KeyboardInterrupt:
|
||||
print('KeyboardInterrupt, skipping rest of training')
|
||||
pass
|
||||
|
||||
# Load checkpoint
|
||||
checkpoint = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))[-1]
|
||||
device = next(model.parameters()).device
|
||||
print(f"Loading checkpoint {checkpoint}")
|
||||
model = model.load_from_checkpoint(checkpoint).to(device)
|
||||
|
||||
trainer.test(model)
|
||||
checkpoints = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))
|
||||
if len(checkpoints):
|
||||
checkpoint = checkpoints[-1]
|
||||
device = next(model.parameters()).device
|
||||
print(f"Loading checkpoint {checkpoint}")
|
||||
model = model.load_from_checkpoint(checkpoint).to(device)
|
||||
|
||||
# Plot
|
||||
loader = model.val_dataloader()
|
||||
@@ -111,4 +115,10 @@ def run_trial(
|
||||
plot_from_loader(model.val_dataloader(), model, i=670, title='val 670')
|
||||
plot_from_loader(model.train_dataloader(), model, i=670, title='train 670')
|
||||
plot_from_loader(model.test_dataloader(), model, i=670, title='test 670')
|
||||
|
||||
try:
|
||||
trainer.test(model)
|
||||
except KeyboardInterrupt:
|
||||
print('KeyboardInterrupt, skipping rest of testing')
|
||||
pass
|
||||
return trial, trainer, model
|
||||
|
||||
Reference in New Issue
Block a user