update notebooks

This commit is contained in:
wassname
2020-03-15 15:50:35 +08:00
parent 62c377b05f
commit d50a823a5e
11 changed files with 1231 additions and 20063 deletions
+146 -11660
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+277 -5502
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+250 -2091
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1 -1
View File
@@ -23,7 +23,7 @@ def collate_fns(max_num_context, max_num_extra_target, sample, sort=True, contex
x = torch.from_numpy(x).float()
y = torch.from_numpy(y).float()
# Last feature will show how far in time a point is from out last context
# Last feature will show how far in time a point is from our last context
assert (np.diff(x[:, :, 0], 1)>=0).all(), 'first features should be ordered e.g. seconds'
assert (x[:, max_num_context, -1]==0.).all(), 'last features should be empty'
time = x[:, :, 0]
+5 -5
View File
@@ -174,13 +174,13 @@ class LatentModelPL(pl.LightningModule):
def add_suggest(trial):
trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(6)])
trial.suggest_categorical("latent_dim", [8*2**i for i in range(6)])
trial.suggest_categorical("hidden_dim", [8*2**i for i in range(8)])
trial.suggest_categorical("latent_dim", [8*2**i for i in range(8)])
trial.suggest_int("attention_layers", 1, 4)
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 8])
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 8])
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 8])
trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 6, 8, 12])
trial.suggest_int("num_heads", 8, 8)
trial.suggest_uniform("dropout", 0, 0.9)
+9 -2
View File
@@ -22,6 +22,10 @@ from torchvision.transforms import ToTensor
from src.data.smart_meter import get_smartmeter_df
from src.utils import ObjectDict
from torch.utils.data._utils.collate import default_collate
def collate_fn(batch, sample=None):
return default_collate(batch)
def log_prob_sigma(value, loc, log_scale):
"""A slightly more stable (not confirmed yet) log prob taking in log_var instead of scale.
@@ -220,6 +224,7 @@ class LSTM_PL(pl.LightningModule):
dset_train,
batch_size=self.hparams.batch_size,
shuffle=True,
collate_fn=collate_fn,
num_workers=self.hparams.num_workers,
)
@@ -231,9 +236,10 @@ class LSTM_PL(pl.LightningModule):
self.hparams,
label_names=["energy(kWh/hh)"],
train=False,
transforms=transforms.ToTensor(),
)
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False,collate_fn=collate_fn,)
@pl.data_loader
def test_dataloader(self):
@@ -243,9 +249,10 @@ class LSTM_PL(pl.LightningModule):
self.hparams,
label_names=["energy(kWh/hh)"],
train=False,
transforms=transforms.ToTensor(),
)
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False)
return DataLoader(dset_test, batch_size=self.hparams.batch_size, shuffle=False, collate_fn=collate_fn,)
@staticmethod
def add_suggest(trial):
+20 -4
View File
@@ -44,7 +44,9 @@ class TransformerSeq2SeqNet(nn.Module):
self._min_std = _min_std
hidden_out_size = self.hparams.hidden_out_size
self.enc_norm = BatchNormSequence(self.hparams.input_size)
self.enc_emb = nn.Linear(self.hparams.input_size, hidden_out_size)
encoder_norm = nn.LayerNorm(hidden_out_size)
layer_enc = nn.TransformerEncoderLayer(
d_model=hidden_out_size,
dim_feedforward=self.hparams.hidden_size,
@@ -55,9 +57,11 @@ class TransformerSeq2SeqNet(nn.Module):
self.encoder = nn.TransformerEncoder(
layer_enc,
num_layers=self.hparams.nlayers,
norm=encoder_norm
)
self.dec_norm = BatchNormSequence(self.hparams.input_size_decoder)
self.dec_emb = nn.Linear(self.hparams.input_size_decoder, hidden_out_size)
layer_dec = nn.TransformerDecoderLayer(
d_model=hidden_out_size,
@@ -65,26 +69,38 @@ class TransformerSeq2SeqNet(nn.Module):
dropout=self.hparams.attention_dropout,
nhead=self.hparams.nhead,
)
decoder_norm = nn.LayerNorm(hidden_out_size)
self.decoder = nn.TransformerDecoder(
layer_dec,
num_layers=self.hparams.nlayers,
norm=decoder_norm
)
self.mean = nn.Linear(hidden_out_size, self.hparams.output_size)
self.std = nn.Linear(hidden_out_size, self.hparams.output_size)
# self._reset_parameters()
def _reset_parameters(self):
r"""Initiate parameters in the transformer model."""
for p in self.parameters():
if p.dim() > 1:
torch.nn.init.xavier_uniform_(p)
def forward(self, context_x, context_y, target_x, target_y=None):
x = torch.cat([context_x, context_y], -1)
# Size([B, C, input_dim])
x = self.enc_emb(x)
x = self.enc_emb(self.enc_norm(x))
# Size([B, C, emb_dim])
memory = self.encoder(x)
# Size([B, C, emb_dim])
target_x = self.dec_emb(target_x)
target_x = self.dec_emb(self.dec_norm(target_x))
# Size([B, T, input_target_dim]) -> Size([B, T, emb_dim])
# In transformers the memory and target_x need to be the same length. Lets use a permutation invariant agg on the context
# Then expand it, so it's available as we decode, conditional on target_x
memory = memory.mean(dim=1, keepdim=True).expand_as(target_x)
memory = memory.max(dim=1, keepdim=True)[0].expand_as(target_x)
outputs = self.decoder(target_x, memory)
# Size([B, T, emb_dim])
@@ -266,7 +282,7 @@ class TransformerSeq2Seq_PL(pl.LightningModule):
trial.suggest_uniform("attention_dropout", 0, 0.75)
trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("hidden_out_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])
trial.suggest_categorical("nlayers", [1, 2, 4, 8])
trial.suggest_categorical("nlayers", [1, 2, 4, 6, 8, 16, 32])
trial.suggest_categorical("nhead", [1, 2, 8, 16])
trial._user_attrs = {
+17 -7
View File
@@ -94,15 +94,19 @@ def run_trial(
model, trainer = main(
trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
)
trainer.fit(model)
try:
trainer.fit(model)
except KeyboardInterrupt:
print('KeyboardInterrupt, skipping rest of training')
pass
# Load checkpoint
checkpoint = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))[-1]
device = next(model.parameters()).device
print(f"Loading checkpoint {checkpoint}")
model = model.load_from_checkpoint(checkpoint).to(device)
trainer.test(model)
checkpoints = sorted(Path(trainer.checkpoint_callback.dirpath).glob("*.ckpt"))
if len(checkpoints):
checkpoint = checkpoints[-1]
device = next(model.parameters()).device
print(f"Loading checkpoint {checkpoint}")
model = model.load_from_checkpoint(checkpoint).to(device)
# Plot
loader = model.val_dataloader()
@@ -111,4 +115,10 @@ def run_trial(
plot_from_loader(model.val_dataloader(), model, i=670, title='val 670')
plot_from_loader(model.train_dataloader(), model, i=670, title='train 670')
plot_from_loader(model.test_dataloader(), model, i=670, title='test 670')
try:
trainer.test(model)
except KeyboardInterrupt:
print('KeyboardInterrupt, skipping rest of testing')
pass
return trial, trainer, model