diff --git a/neural_processes/data/smart_meter.py b/neural_processes/data/smart_meter.py index 54ee2fe..2783a24 100644 --- a/neural_processes/data/smart_meter.py +++ b/neural_processes/data/smart_meter.py @@ -193,8 +193,8 @@ def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=60, test_files = [f for f in csv_files if is_test(f)] val_files = [f for f in csv_files if is_val(f) and (not is_test(f))] train_files = [f for f in csv_files if (not is_val(f)) and (not is_test(f))] - print(len(train_files), len(val_files), len(test_files)) - print(train_files, val_files, test_files) + # print(len(train_files), len(val_files), len(test_files)) + # print(train_files, val_files, test_files) assert not set(train_files).intersection(set(test_files), set(val_files)) assert not set(test_files).intersection(set(val_files)) diff --git a/neural_processes/lightning.py b/neural_processes/lightning.py index ee4a940..76207dd 100644 --- a/neural_processes/lightning.py +++ b/neural_processes/lightning.py @@ -57,7 +57,7 @@ class PL_Seq2Seq(pl.LightningModule): train_outputs = agg_logs(self.train_logs) self.train_logs = [] - print(f"step val {self.trainer.global_step}, {outputs} {train_outputs}") + logger.info(f"step val {self.trainer.global_step}, {outputs} {train_outputs}") # tensorboard_logs_str = {k: f"{v}" for k, v in tensorboard_logs.items()} # print(f"step {self.trainer.global_step}, {outputs}") diff --git a/neural_processes/models/lstm_seqseq.py b/neural_processes/models/lstm_seqseq.py index 48b7005..5d396ec 100644 --- a/neural_processes/models/lstm_seqseq.py +++ b/neural_processes/models/lstm_seqseq.py @@ -38,7 +38,6 @@ class Seq2SeqNet(nn.Module): def __init__(self, hparams, _min_std=0.05): super().__init__() hparams = hparams_power(hparams) - print(hparams) self.hparams = hparams self._min_std = _min_std @@ -107,7 +106,7 @@ class Seq2SeqNet(nn.Module): if output is not None: num_layers = h_out.shape[1] - print(cell.max(), h_out.max(), h.max()) + # print(cell.max(), h_out.max(), h.max()) h_out += h.mean(1).repeat(1, num_layers, 1) cell += h.max(1).repeat(1, num_layers, 1) diff --git a/neural_processes/models/neural_process/lightning.py b/neural_processes/models/neural_process/lightning.py index 327a1ed..b7e6f33 100644 --- a/neural_processes/models/neural_process/lightning.py +++ b/neural_processes/models/neural_process/lightning.py @@ -17,15 +17,15 @@ class PL_NeuralProcess(PL_Seq2Seq): DEFAULT_ARGS = { 'dropout': 0.1, - 'learning_rate': 0.004, + 'learning_rate': 0.003, 'attention_dropout': 0.5, 'batchnorm': False, 'attention_layers': 2, 'det_enc_cross_attn_type': 'uniform', 'det_enc_self_attn_type': 'uniform', 'latent_enc_self_attn_type': 'uniform', - 'num_heads_power': 3, - 'hidden_dim_power': 3, + 'num_heads_power': 2, + 'hidden_dim_power': 6, 'latent_dim_power': 5, 'n_latent_encoder_layers': 3, 'n_det_encoder_layers': 3, diff --git a/neural_processes/models/neural_process/model.py b/neural_processes/models/neural_process/model.py index d5605af..ae83378 100644 --- a/neural_processes/models/neural_process/model.py +++ b/neural_processes/models/neural_process/model.py @@ -309,11 +309,11 @@ class NeuralProcess(nn.Module): device = next(self.parameters()).device - if self.hparams.get('bnorm_inputs', True): - # https://stackoverflow.com/a/46772183/221742 - target_x = self.norm_x(target_x) - context_x = self.norm_x(context_x) - context_y = self.norm_y(context_y) + # if self.hparams.get('bnorm_inputs', True): + # https://stackoverflow.com/a/46772183/221742 + target_x = self.norm_x(target_x) + context_x = self.norm_x(context_x) + context_y = self.norm_y(context_y) if self._use_rnn: # see https://arxiv.org/abs/1910.09323 where x is substituted with h = RNN(x) diff --git a/neural_processes/modules/attention.py b/neural_processes/modules/attention.py index 7f60446..f62cfa7 100644 --- a/neural_processes/modules/attention.py +++ b/neural_processes/modules/attention.py @@ -63,6 +63,8 @@ class Attention(nn.Module): dropout=dropout, batchnorm=batchnorm, ) + elif self._rep == "lstm": + self._lstm = LSTMBlock(x_dim, hidden_dim, batchnorm=batchnorm, dropout=dropout, num_layers=attention_layers) if attention_type == "uniform": self._attention_func = self._uniform_attention @@ -95,6 +97,9 @@ class Attention(nn.Module): if self._rep == "mlp": k = self.batch_mlp_k(k) q = self.batch_mlp_q(q) + elif self._rep == "lstm": + k = self.batch_lstm(k) + q = self.batch_lstm(q) rep = self._attention_func(k, v, q) return rep diff --git a/neural_processes/train.py b/neural_processes/train.py index d53716e..cf748c9 100644 --- a/neural_processes/train.py +++ b/neural_processes/train.py @@ -42,6 +42,7 @@ def main( val_percent_check=PERCENT_TEST_EXAMPLES, checkpoint_callback=checkpoint_callback, max_epochs=hparams["max_nb_epochs"], + weights_summary='top', gpus=-1 if torch.cuda.is_available() else None, early_stop_callback=PyTorchLightningPruningCallback(trial, monitor="val_loss") if prune @@ -62,7 +63,7 @@ def objective(trial, PL_MODEL_CLS, name, user_attrs): trial = PL_MODEL_CLS.add_suggest(trial) [trial.set_user_attr(k, v) for k, v in user_attrs.items()] - print(dict(number=trial.number, params=trial.params, user_attrs=trial.user_attrs)) + logger.debug(dict(number=trial.number, params=trial.params, user_attrs=trial.user_attrs)) model, trainer = main(trial, PL_MODEL_CLS=PL_MODEL_CLS, name=name) @@ -71,24 +72,24 @@ def objective(trial, PL_MODEL_CLS, name, user_attrs): if len(checkpoints): checkpoint = checkpoints[-1] device = next(model.parameters()).device - print(f"Loading checkpoint {checkpoint}") + logger.info(f"Loading checkpoint {checkpoint}") model = model.load_from_checkpoint(checkpoint).to(device) trainer.test(model) # also report to tensorboard & print - print("logger.metrics", model.logger.metrics[-1:]) + logger.info("logger.metrics", model.logger.metrics[-1:]) model.logger.experiment.add_hparams(trial.params, model.logger.metrics[-1]) model.logger.save() - return model.logger.metrics[-1]["agg_test_loss"] + return model.logger.metrics[-1]["agg_test_score"] def add_number(trial: optuna.Trial, model_dir: Path): # For manual experiment we will start at -1 and deincr by 1 versions = [int(s.stem.split("_")[-1]) for s in model_dir.glob("version_*")] + [-1] trial.number = min(versions) - 1 - print("trial.number", trial.number) + # logger.debug("trial.number", trial.number) return trial @@ -101,7 +102,7 @@ def run_trial( plot_from_loader=plot_from_loader, number=None, ): - print(f"now run `tensorboard --logdir {MODEL_DIR}`") + logger.info(f"now run `tensorboard --logdir {MODEL_DIR}`") (MODEL_DIR / name).mkdir(parents=True, exist_ok=True) if getattr(PL_MODEL_CLS, 'DEFAULT_ARGS', None): @@ -121,7 +122,7 @@ def run_trial( # Add user attributes [trial.set_user_attr(k, v) for k, v in user_attrs.items()] - print('trial', trial.number, trial, trial.params, trial.user_attrs) + logger.info('trial number=%s trial=%s params=%s attrs=%s', trial.number, trial, trial.params, trial.user_attrs) model, trainer = main( trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False @@ -132,7 +133,7 @@ def run_trial( try: trainer.fit(model) except KeyboardInterrupt: - print('KeyboardInterrupt, skipping rest of training') + logger.warning('KeyboardInterrupt, skipping rest of training') pass # Plot @@ -151,7 +152,7 @@ def run_trial( if len(checkpoints): checkpoint = checkpoints[-1] device = next(model.parameters()).device - print(f"Loading checkpoint {checkpoint}") + logger.info(f"Loading checkpoint {checkpoint}") model = model.load_from_checkpoint(checkpoint).to(device) # Plot @@ -162,11 +163,11 @@ def run_trial( plot_from_loader(model.test_dataloader(), model, i=670, title='test 670') plt.show() else: - print('no checkpoints') + logger.warning('no checkpoints') try: trainer.test(model) except KeyboardInterrupt: - print('KeyboardInterrupt, skipping rest of testing') + logger.warning('KeyboardInterrupt, skipping rest of testing') pass return trial, trainer, model