misc

2026-06-27 16:44:27 +08:00 · 2020-04-11 15:37:55 +08:00
parent 4957e04bdc
commit 0a393b4e02
11 changed files with 606 additions and 329 deletions
@@ -8,7 +8,11 @@ from torch.nn import functional as F
 from torch.utils.data import DataLoader
 from torchvision.datasets import MNIST
 from test_tube import Experiment, HyperOptArgumentParser
-from neural_processes.data.smart_meter import collate_fns, SmartMeterDataSet, get_smartmeter_df
+from neural_processes.data.smart_meter import (
+    collate_fns,
+    SmartMeterDataSet,
+    get_smartmeter_df,
+)
 import torchvision.transforms as transforms
 from neural_processes.plot import plot_from_loader_to_tensor, plot_from_loader
 from argparse import ArgumentParser
@@ -20,20 +24,23 @@ import torch
 import io
 import PIL
 from torchvision.transforms import ToTensor
+
 from neural_processes.modules import BatchNormSequence
 from neural_processes.data.smart_meter import get_smartmeter_df

 from neural_processes.utils import ObjectDict
 from neural_processes.lightning import PL_Seq2Seq
+from ..logger import logger
+from ..utils import hparams_power


 class Seq2SeqNet(nn.Module):
-    def __init__(self, hparams, _min_std = 0.05):
+    def __init__(self, hparams, _min_std=0.05):
        super().__init__()
+        hparams = hparams_power(hparams)
        self.hparams = hparams
        self._min_std = _min_std

-
        self.norm_input = BatchNormSequence(self.hparams.input_size)
        self.encoder = nn.LSTM(
            input_size=self.hparams.input_size,
@@ -43,7 +50,9 @@ class Seq2SeqNet(nn.Module):
            bidirectional=self.hparams.bidirectional,
            dropout=self.hparams.lstm_dropout,
        )
-        self.multihead_attn = nn.MultiheadAttention(self.hparams.hidden_size, num_heads=8)
+        self.multihead_attn = nn.MultiheadAttention(
+            self.hparams.hidden_size, num_heads=8
+        )

        self.norm_target = BatchNormSequence(self.hparams.input_size_decoder)
        self.decoder = nn.LSTM(
@@ -54,9 +63,8 @@ class Seq2SeqNet(nn.Module):
            bidirectional=self.hparams.bidirectional,
            dropout=self.hparams.lstm_dropout,
        )
-        self.hidden_out_size = (
-            self.hparams.hidden_size
-            * (self.hparams.bidirectional + 1)
+        self.hidden_out_size = self.hparams.hidden_size * (
+            self.hparams.bidirectional + 1
        )
        self.mean = nn.Linear(self.hidden_out_size, self.hparams.output_size)
        self.std = nn.Linear(self.hidden_out_size, self.hparams.output_size)
@@ -76,74 +84,86 @@ class Seq2SeqNet(nn.Module):
        # context_x, d_encoded, target_x = k, v, q

        # query, key, value = target_x, context_x, d_encoded
-        attn_output, _ = self.multihead_attn(h_out.permute(1, 0, 2), h_out.permute(1, 0, 2), h_out.permute(1, 0, 2))
+        attn_output, _ = self.multihead_attn(
+            h_out.permute(1, 0, 2), h_out.permute(1, 0, 2), h_out.permute(1, 0, 2)
+        )
        h_out = attn_output.permute(1, 0, 2).contiguous()
-        attn_output, _ = self.multihead_attn(cell.permute(1, 0, 2), cell.permute(1, 0, 2), cell.permute(1, 0, 2))
+        attn_output, _ = self.multihead_attn(
+            cell.permute(1, 0, 2), cell.permute(1, 0, 2), cell.permute(1, 0, 2)
+        )
        cell = attn_output.permute(1, 0, 2).contiguous()

        outputs, (_, _) = self.decoder(target_x, (h_out, cell))
        # output = [batch size, seq len, hid dim * n directions]
-        
+
        # outputs: [B, T, num_direction * H]
        mean = self.mean(outputs)
        log_sigma = self.std(outputs)
        if self._use_lvar:
-            log_sigma = torch.clamp(log_sigma, math.log(self._min_std), -math.log(self._min_std))
+            log_sigma = torch.clamp(
+                log_sigma, math.log(self._min_std), -math.log(self._min_std)
+            )
            sigma = torch.exp(log_sigma)
        else:
            sigma = self._min_std + (1 - self._min_std) * F.softplus(log_sigma)
-        y_dist=torch.distributions.Normal(mean, sigma)
-        
+        y_dist = torch.distributions.Normal(mean, sigma)
+
        # Loss
        loss_mse = loss_p = None
        if target_y is not None:
-            loss_mse = F.mse_loss(mean, target_y, reduction='none')
+            loss_mse = F.mse_loss(mean, target_y, reduction="none")
            if self._use_lvar:
                loss_p = -log_prob_sigma(target_y, mean, log_sigma)
            else:
                loss_p = -y_dist.log_prob(target_y).mean(-1)
-            
+
            if self.hparams["context_in_target"]:
-                loss_p[:context_x.size(1)] /= 100
-                loss_mse[:context_x.size(1)] /= 100
+                loss_p[: context_x.size(1)] /= 100
+                loss_mse[: context_x.size(1)] /= 100
            # # Don't catch loss on context window
            # mean = mean[:, self.hparams.num_context:]
            # log_sigma = log_sigma[:, self.hparams.num_context:]

        y_pred = y_dist.rsample if self.training else y_dist.loc
-        return y_pred, dict(loss_p=loss_p.mean(), loss_mse=loss_mse.mean()), dict(log_sigma=log_sigma, dist=y_dist)
+        return (
+            y_pred,
+            dict(loss_p=loss_p.mean(), loss_mse=loss_mse.mean()),
+            dict(log_sigma=log_sigma, dist=y_dist),
+        )


 class LSTMSeq2Seq_PL(PL_Seq2Seq):
-    def __init__(self, hparams,
-        MODEL_CLS=Seq2SeqNet, **kwargs):
-        super().__init__(hparams,
-        MODEL_CLS=MODEL_CLS, **kwargs)
+    def __init__(self, hparams, MODEL_CLS=Seq2SeqNet, **kwargs):
+        super().__init__(hparams, MODEL_CLS=MODEL_CLS, **kwargs)
+
+    DEFAULT_ARGS = {
+        "agg": "mean",
+        "lstm_dropout": 0.22,
+        "hidden_size_power": 4.0,
+        "learning_rate": 0.001,
+        "lstm_layers": 4,
+        'bidirectional': False
+    }

-    DEFAULT_ARGS = {'agg': 'mean', 'lstm_dropout': 0.12013231612195126, 'hidden_out_size_power': 4.0, 'hidden_size_power': 7.0, 'learning_rate': 0.0022924639229335475, 'nhead_power': 2.0, 'nlayers_power': 4.0}
-    
    @staticmethod
    def add_suggest(trial):
-        # TODO make label name configurable
-        # TODO make data source configurable
        trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
        trial.suggest_uniform("lstm_dropout", 0, 0.75)
-        trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])    
-        trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])    
-        trial.suggest_categorical("bidirectional", [False, True])    
-        
+        trial.suggest_discrete_uniform("hidden_size_power", 3, 9, 1)
+        trial.suggest_int("lstm_layers", 1, 8)
+        trial.suggest_categorical("bidirectional", [False, True])

        trial._user_attrs = {
-                'batch_size': 16,
-                'grad_clip': 40,
-                'max_nb_epochs': 200,
-                'num_workers': 4,
-                'num_extra_target': 24*4,
-                'vis_i': '670',
-                'num_context': 24*4,
-                'input_size': 18,
-                'input_size_decoder': 17,
-                'context_in_target': True,
-                'output_size': 1
+            "batch_size": 16,
+            "grad_clip": 40,
+            "max_nb_epochs": 200,
+            "num_workers": 4,
+            "num_extra_target": 24 * 4,
+            "vis_i": "670",
+            "num_context": 24 * 4,
+            "input_size": 18,
+            "input_size_decoder": 17,
+            "context_in_target": False,
+            "output_size": 1,
        }
        return trial
@@ -24,15 +24,14 @@ from neural_processes.data.smart_meter import get_smartmeter_df
 from neural_processes.utils import ObjectDict
 from ..lightning import PL_Seq2Seq
 from torch.utils.data._utils.collate import default_collate
-
-def collate_fn(batch, sample=None):
-    return default_collate(batch)
+from ..logger import logger
+from ..utils import hparams_power


 class LSTMNet(nn.Module):
-
-    def __init__(self, hparams, _min_std = 0.05):
+    def __init__(self, hparams, _min_std=0.05):
        super().__init__()
+        hparams = hparams_power(hparams)
        self.hparams = hparams
        self._min_std = _min_std

@@ -44,9 +43,8 @@ class LSTMNet(nn.Module):
            bidirectional=self.hparams.bidirectional,
            dropout=self.hparams.lstm_dropout,
        )
-        self.hidden_out_size = (
-            self.hparams.hidden_size
-            * (self.hparams.bidirectional + 1)
+        self.hidden_out_size = self.hparams.hidden_size * (
+            self.hparams.bidirectional + 1
        )
        self.mean = nn.Linear(self.hidden_out_size, 1)
        self.std = nn.Linear(self.hidden_out_size, 1)
@@ -62,7 +60,12 @@ class LSTMNet(nn.Module):

        loss = None
        if target_y is not None:
-            loss = F.mse_loss(y_pred * loss_scale, y[:, -steps:, :] * loss_scale, reduction='none') / loss_scale
+            loss = (
+                F.mse_loss(
+                    y_pred * loss_scale, y[:, -steps:, :] * loss_scale, reduction="none"
+                )
+                / loss_scale
+            )

            assert torch.isfinite(loss)

@@ -70,35 +73,39 @@ class LSTMNet(nn.Module):


 class LSTM_PL_STD(PL_Seq2Seq):
-    def __init__(self, hparams,
-        MODEL_CLS=LSTMNet, **kwargs):
-        super().__init__(hparams,
-        MODEL_CLS=MODEL_CLS, **kwargs)
+    def __init__(self, hparams, MODEL_CLS=LSTMNet, **kwargs):
+        super().__init__(hparams, MODEL_CLS=MODEL_CLS, **kwargs)

-    DEFAULT_ARGS = {'bidirectional': False, 'hidden_size_power': 4, 'learning_rate': 0.0010825329363784934, 'lstm_dropout': 0.3905792111699782, 'lstm_layers': 4}
+    DEFAULT_ARGS = {
+        "bidirectional": False,
+        "hidden_size_power": 4,
+        "learning_rate": 0.001,
+        "lstm_dropout": 0.39,
+        "lstm_layers": 4,
+        "bidirectional": False,
+    }

    @staticmethod
    def add_suggest(trial):
        trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
-        trial.suggest_uniform("lstm_dropout", 0, 0.75)
-        trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512])    
-        trial.suggest_categorical("lstm_layers", [1, 2, 4, 8])
+        trial.suggest_uniform("lstm_dropout", 0, 0.85)
+        trial.suggest_discrete_uniform("hidden_size_power", 3, 9, 1)
+        trial.suggest_int("lstm_layers", 1, 8)
        trial.suggest_categorical("bidirectional", [False, True])
-        
+
        # constants
        trial._user_attrs = {
-            'batch_size': 16,
-            'grad_clip': 40,
-            'max_nb_epochs': 200,
-            'num_workers': 4,
-            'num_extra_target': 24*4,
-            'vis_i': '670',
-            'num_context': 24*4,
-            'input_size': 18,
-            'input_size_decoder': 17,
-            'context_in_target': True,
-            'output_size': 1,
-            'patience': 3,
+            "batch_size": 16,
+            "grad_clip": 40,
+            "max_nb_epochs": 200,
+            "num_workers": 4,
+            "num_extra_target": 24 * 4,
+            "vis_i": "670",
+            "num_context": 24 * 4,
+            "input_size": 18,
+            "input_size_decoder": 17,
+            "context_in_target": False,
+            "output_size": 1,
+            "patience": 3,
        }
        return trial
-
@@ -21,14 +21,14 @@ class PL_NeuralProcess(PL_Seq2Seq):
        'det_enc_cross_attn_type': 'multihead',
        'det_enc_self_attn_type': 'uniform',
        'dropout': 0,
-        'hidden_dim': 128,
-        'latent_dim': 128,
+        'hidden_dim_power': 7,
+        'latent_dim_power': 7,
        'latent_enc_self_attn_type': 'uniform',
        'learning_rate': 0.002,
        'n_decoder_layers': 4,
        'n_det_encoder_layers': 4,
-        'n_latent_encoder_layers': 2,
-        'num_heads': 8,
+        'n_latent_encoder_layers_power': 1,
+        'num_heads_power': 3,
        'use_deterministic_path': True,
        'use_lvar': True,
        'use_self_attn': True,
@@ -37,16 +37,23 @@ class PL_NeuralProcess(PL_Seq2Seq):

    @staticmethod
    def add_suggest(trial):        
-        trial.suggest_loguniform("learning_rate", 1e-5, 1e-2)
-
-        trial.suggest_categorical("hidden_dim", [8*2**i for i in range(8)])
-        trial.suggest_categorical("latent_dim", [8*2**i for i in range(8)])
-        
+        trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
        trial.suggest_int("attention_layers", 1, 4)
-        trial.suggest_categorical("n_latent_encoder_layers", [1, 2, 4, 6, 8, 12])
-        trial.suggest_categorical("n_det_encoder_layers", [1, 2, 4, 6, 8, 12])
-        trial.suggest_categorical("n_decoder_layers", [1, 2, 4, 6, 8, 12])
-        trial.suggest_int("num_heads", 8, 8)
+        trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
+
+        trial.suggest_discrete_uniform(
+            "hidden_dim_power", 3, 11, 1
+        )
+        trial.suggest_discrete_uniform(
+            "latent_dim_power", 3, 11, 1
+        )
+        trial.suggest_int(
+            "n_latent_encoder_layers", 1, 11
+        )
+
+        trial.suggest_int("n_latent_encoder_layers", 1, 12)
+        trial.suggest_int("n_det_encoder_layers", 1, 12)
+        trial.suggest_int("n_decoder_layers", 1, 12)

        trial.suggest_uniform("dropout", 0, 0.9)
        trial.suggest_uniform("attention_dropout", 0, 0.9)
@@ -72,7 +79,7 @@ class PL_NeuralProcess(PL_Seq2Seq):
            'vis_i': '670',
            'num_extra_target': 24*4,
            'x_dim': 18,
-            'context_in_target': True,
+            'context_in_target': False,
            'y_dim': 1,
            'patience': 3,
            'min_std': 0.005,
@@ -6,7 +6,7 @@ import math

 from neural_processes.modules import BatchNormSequence, BatchMLP, Attention, LSTMBlock
 from neural_processes.utils import kl_loss_var, log_prob_sigma
-
+from neural_processes.utils import hparams_power

 class LatentEncoder(nn.Module):
    def __init__(
@@ -195,6 +195,7 @@ class NeuralProcess(nn.Module):

    @staticmethod
    def FROM_HPARAMS(hparams):
+        hparams = hparams_power(hparams)
        return NeuralProcess(**hparams)
    
    def __init__(self,
@@ -9,15 +9,15 @@ from torch.nn import functional as F
 from torch.utils.data import DataLoader

 from neural_processes.lightning import PL_Seq2Seq
+from ..logger import logger

 class NetTransformer(nn.Module):
    def __init__(self, hparams):
        super().__init__()
-        hparams["nlayers"] = int(2 ** hparams["nlayers_power"])
-        hparams["hidden_size"] = int(2**hparams["hidden_size_power"])
-        hparams["hidden_out_size"] = int(2 ** hparams["hidden_out_size_power"])
-        hparams["nhead"] = int(2 ** hparams["nhead_power"])
-        logger.debug(f"{type(self)} hparams {hparams}")
+        for k in hparams.keys():
+            if k.endswith("_power"):
+                k_new = k.replace("_power", "")
+                hparams[k_new] = int(2 ** hparams[k])
        self.hparams = hparams

        hidden_out_size = self.hparams.hidden_out_size
@@ -148,7 +148,7 @@ class PL_Transformer(PL_Seq2Seq):
        )
        trial.suggest_discrete_uniform("hidden_out_size_power", 2, 9, 1)
        trial.suggest_discrete_uniform("nhead_power", 1, 4, 1)
-        trial.suggest_discrete_uniform("nlayers_power", 1, 5, 1)      
+        trial.suggest_int("nlayers_power", 1, 12)      

        user_attrs_default = {
            "batch_size": 16,
@@ -159,6 +159,9 @@ class PL_Transformer(PL_Seq2Seq):
            "input_size": 6,
            "output_size": 1,
            "label_steps": 24,
+            "nan_value": -99.9,
+            'context_in_target': False,
+            'patience': 3,
        }
        [trial.set_user_attr(k, v) for k, v in user_attrs_default.items()]
        [trial.set_user_attr(k, v) for k, v in user_attrs.items()]
@@ -27,10 +27,13 @@ from neural_processes.modules import BatchNormSequence

 from neural_processes.utils import ObjectDict
 from neural_processes.lightning import PL_Seq2Seq
+from ..logger import logger
+from ..utils import hparams_power

 class TransformerSeq2SeqNet(nn.Module):
    def __init__(self, hparams, _min_std = 0.05):
        super().__init__()
+        hparams = hparams_power(hparams)
        self.hparams = hparams
        self._min_std = _min_std

@@ -134,7 +137,7 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
        super().__init__(hparams,
        MODEL_CLS=MODEL_CLS, **kwargs)
    
-    DEFAULT_ARGS = {'agg': 'mean', 'attention_dropout': 0.12013231612195126, 'hidden_out_size_power': 4.0, 'hidden_size_power': 7.0, 'learning_rate': 0.0022924639229335475, 'nhead_power': 2.0, 'nlayers_power': 4.0}
+    DEFAULT_ARGS = {'agg': 'mean', 'attention_dropout': 0.12, 'hidden_out_size_power': 4, 'hidden_size_power': 7, 'learning_rate': 0.0023, 'nhead_power': 2, 'nlayers_power': 4}

    @staticmethod
    def add_suggest(trial: optuna.Trial):
@@ -155,10 +158,12 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
        """
        trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
        trial.suggest_uniform("attention_dropout", 0, 0.75)
-        trial.suggest_categorical("hidden_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])    
-        trial.suggest_categorical("hidden_out_size", [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048])    
-        trial.suggest_categorical("nlayers", [1, 2, 4, 6, 8, 16, 32])
-        trial.suggest_categorical("nhead", [1, 2, 8, 16])
+        trial.suggest_discrete_uniform(
+            "hidden_size_power", 2, 10, 1
+        ) 
+        trial.suggest_discrete_uniform("hidden_out_size_power", 2, 9, 1) 
+        trial.suggest_discrete_uniform("nhead_power", 1, 4, 1)
+        trial.suggest_int("nlayers", 1, 12)

        trial._user_attrs = {
            'batch_size': 16,
@@ -170,7 +175,7 @@ class TransformerSeq2Seq_PL(PL_Seq2Seq):
            'num_context': 24*4,
            'input_size': 18,
            'input_size_decoder': 17,
-            'context_in_target': True,
+            'context_in_target': False,
            'output_size': 1,
            'patience': 3,
        }
@@ -106,7 +106,7 @@ def run_trial(

    # Add user attributes
    trial._user_attrs.update(user_attrs)
-    print('trial', trial)
+    print('trial', trial, trial.params, trial.user_attrs)

    model, trainer = main(
        trial, PL_MODEL_CLS, name=name, MODEL_DIR=MODEL_DIR, train=False, prune=False
@@ -55,28 +55,63 @@ class PyTorchLightningPruningCallback(EarlyStopping):
            raise optuna.exceptions.TrialPruned(message)


+# class ObjectDict(dict):
+#     """
+#     Interface similar to an argparser
+#     """
+
+#     def __init__(self):
+#         pass
+
+#     def __setattr__(self, attr, value):
+#         self[attr] = value
+#         return self[attr]
+
+#     def __getattr__(self, attr):
+#         if attr.startswith("_"):
+#             # https://stackoverflow.com/questions/10364332/how-to-pickle-python-object-derived-from-dict
+#             raise AttributeError
+#         try:
+#             return super().__getitem__(attr)
+#         except KeyError:
+#             # cPickle expects __getattr__ to raise AttributeError, not KeyError.
+#             raise AttributeError(self._KeyErrorString(name))
+
+#     @property
+#     def __dict__(self):
+#         return dict(self)
+
 class ObjectDict(dict):
    """
-    Interface similar to an argparser
+    easy way to represent (hyper)parameters.
+
+    https://stackoverflow.com/a/50613966/221742
    """
+    __getattr__ = dict.__getitem__
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__

-    def __init__(self):
-        pass
+    def __getstate__(self):
+        return self

-    def __setattr__(self, attr, value):
-        self[attr] = value
-        return self[attr]
+    def __setstate__(self, state):
+        self.update(state)

-    def __getattr__(self, attr):
-        if attr.startswith("_"):
-            # https://stackoverflow.com/questions/10364332/how-to-pickle-python-object-derived-from-dict
-            raise AttributeError
-        return dict(self)[attr]
+    def copy(self, **extra_params):
+        return ObjectDict(**self, **extra_params)

-    @property
-    def __dict__(self):
-        return dict(self)

+def hparams_power(hparams):
+    """Some value we want to go up in powers of 2
+    
+    So any hyper param that ends in power will be used this way.
+    """
+    hparams_old = hparams.copy()
+    for k in hparams_old.keys():
+        if k.endswith("_power"):
+            k_new = k.replace("_power", "")
+            hparams[k_new] = int(2 ** hparams[k])
+    return hparams

 def log_prob_sigma(value, loc, log_scale):
    """A slightly more stable (not confirmed yet) log prob taking in log_var instead of scale.
@@ -1,3 +1,7 @@
+# local package
+-e .
+
+# external requirements
 torch>=1.3.0
 tqdm
 pandas
@@ -0,0 +1,10 @@
+from setuptools import find_packages, setup
+
+setup(
+    name='neural_processes',
+    packages=find_packages(),
+    version='0.1.0',
+    description='Attentive Neural Processes',
+    author='wassname',
+    license='Apachev2',
+)