mirror of
https://github.com/wassname/attentive-neural-processes.git
synced 2026-06-27 16:44:27 +08:00
misc
This commit is contained in:
@@ -146,7 +146,7 @@ def is_val(f):
|
||||
return f2i(f) % 7==1
|
||||
|
||||
@cache.memoize()
|
||||
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=40, use_logy=False):
|
||||
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=60, use_logy=False):
|
||||
|
||||
df_weather = load_weather_csv(indir/'weather_hourly_darksky.csv')
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class PL_Seq2Seq(pl.LightningModule):
|
||||
|
||||
# tensorboard_logs_str = {k: f"{v}" for k, v in tensorboard_logs.items()}
|
||||
# print(f"step {self.trainer.global_step}, {outputs}")
|
||||
return {"val_loss": outputs["val_loss"], "train_loss": train_outputs.get("train_loss", None), "log": {**train_outputs["log"], **outputs["log"]}}
|
||||
return {"val_loss": outputs["agg_val_loss"], "train_loss": train_outputs.get("agg_train_loss", None), "log": {**train_outputs.get("log", {}), **outputs["log"]}}
|
||||
|
||||
|
||||
def show_image(self):
|
||||
@@ -82,8 +82,8 @@ class PL_Seq2Seq(pl.LightningModule):
|
||||
context_x, context_y, target_x, target_y = batch
|
||||
y_dist = extra['y_dist']
|
||||
|
||||
# For test use a diff loss, log_p over next <24h, so it's a standard amount of steps
|
||||
loss = -y_dist.log_prob(target_y)[:, :24].mean()
|
||||
# For test use a -logp only
|
||||
loss = -y_dist.log_prob(target_y).mean()
|
||||
tensorboard_logs = {"test_" + k: v for k, v in losses.items()}
|
||||
assert torch.isfinite(loss)
|
||||
return {"test_loss": loss, "log": tensorboard_logs}
|
||||
@@ -91,11 +91,10 @@ class PL_Seq2Seq(pl.LightningModule):
|
||||
def test_end(self, outputs):
|
||||
|
||||
outputs = agg_logs(outputs)
|
||||
|
||||
logger.info(
|
||||
f"step {self.trainer.global_step}, {outputs}"
|
||||
)
|
||||
return {"test_loss": outputs["test_loss"], "log": outputs["log"]}
|
||||
return {"test_loss": outputs["agg_test_loss"], "log": outputs["log"]}
|
||||
|
||||
def configure_optimizers(self):
|
||||
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
|
||||
@@ -153,6 +152,7 @@ class PL_Seq2Seq(pl.LightningModule):
|
||||
collate_fn=collate_fns(
|
||||
self.hparams["num_context"], self.hparams["num_extra_target"], sample=False, context_in_target=self.hparams["context_in_target"]
|
||||
),
|
||||
num_workers=self.hparams["num_workers"],
|
||||
)
|
||||
|
||||
@pl.data_loader
|
||||
@@ -172,4 +172,5 @@ class PL_Seq2Seq(pl.LightningModule):
|
||||
self.hparams["num_context"], self.hparams["num_extra_target"], sample=False, context_in_target=self.hparams["context_in_target"]
|
||||
),
|
||||
sampler=sampler,
|
||||
num_workers=self.hparams["num_workers"],
|
||||
)
|
||||
|
||||
@@ -25,8 +25,8 @@ class PL_NeuralProcess(PL_Seq2Seq):
|
||||
'det_enc_self_attn_type': 'uniform',
|
||||
'latent_enc_self_attn_type': 'uniform',
|
||||
'num_heads_power': 3,
|
||||
'hidden_dim_power': 5,
|
||||
'latent_dim_power': 4,
|
||||
'hidden_dim_power': 6,
|
||||
'latent_dim_power': 5,
|
||||
'n_decoder_layers': 4,
|
||||
'n_latent_encoder_layers': 2,
|
||||
'use_deterministic_path': False,
|
||||
@@ -149,7 +149,6 @@ class PL_ANP(PL_NeuralProcess):
|
||||
@staticmethod
|
||||
def add_suggest(trial, user_attrs={}):
|
||||
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
|
||||
trial.suggest_int("attention_layers", 1, 4)
|
||||
trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
|
||||
|
||||
trial.suggest_discrete_uniform(
|
||||
@@ -165,12 +164,6 @@ class PL_ANP(PL_NeuralProcess):
|
||||
trial.suggest_uniform("dropout", 0, 0.9)
|
||||
trial.suggest_uniform("attention_dropout", 0, 0.9)
|
||||
|
||||
trial.suggest_categorical(
|
||||
"latent_enc_self_attn_type", ['uniform', 'multihead']
|
||||
)
|
||||
trial.suggest_categorical("det_enc_self_attn_type", ['uniform', 'multihead'])
|
||||
trial.suggest_categorical("det_enc_cross_attn_type", ['uniform', 'multihead'])
|
||||
|
||||
trial.suggest_categorical("batchnorm", [False, True])
|
||||
trial.suggest_categorical("use_deterministic_path", [False, True])
|
||||
|
||||
@@ -205,7 +198,6 @@ class PL_ANPRNN(PL_NeuralProcess):
|
||||
@staticmethod
|
||||
def add_suggest(trial, user_attrs={}):
|
||||
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
|
||||
trial.suggest_int("attention_layers", 1, 4)
|
||||
trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
|
||||
|
||||
trial.suggest_discrete_uniform(
|
||||
|
||||
@@ -361,9 +361,9 @@ class NeuralProcess(nn.Module):
|
||||
|
||||
# Weight loss nearer to prediction time?
|
||||
weight = (torch.arange(loss_p.shape[1]) + 1).float().to(device)[None, :]
|
||||
loss_p_weighted = loss_p / torch.sqrt(weight) # We want to weight nearer stuff more
|
||||
loss_p_weighted = loss_p / torch.sqrt(weight) # We want to weight nearer stuff more
|
||||
|
||||
loss_p_weighted = loss_p_weighted.mean()
|
||||
|
||||
loss = (loss_kl - log_p).mean()
|
||||
loss_kl = loss_kl.mean()
|
||||
log_p = log_p.mean()
|
||||
@@ -377,4 +377,4 @@ class NeuralProcess(nn.Module):
|
||||
loss_p_weighted = None
|
||||
|
||||
y_pred = dist.rsample() if self.training else dist.loc
|
||||
return y_pred, dict(loss=loss, loss_p=loss_p, loss_kl=loss_kl, loss_mse=mse_loss, loss_p_weighted=loss_p_weighted), dict(log_sigma=log_sigma, dist=dist)
|
||||
return y_pred, dict(loss=loss, loss_p=loss_p, loss_kl=loss_kl, loss_mse=mse_loss, loss_p_weighted=loss_p_weighted), dict(log_sigma=log_sigma, y_dist=dist)
|
||||
|
||||
@@ -35,21 +35,6 @@ class NetTransformer(nn.Module):
|
||||
self.encoder = nn.TransformerEncoder(
|
||||
layer_enc, num_layers=self.hparams.nlayers, norm=encoder_norm
|
||||
)
|
||||
|
||||
# self.dec_norm = BatchNormSequence(self.hparams.x_dim)
|
||||
# self.dec_emb = nn.Linear(self.hparams.x_dim, hidden_out_size)
|
||||
# layer_dec = nn.TransformerDecoderLayer(
|
||||
# d_model=hidden_out_size,
|
||||
# dim_feedforward=self.hparams.hidden_size,
|
||||
# dropout=self.hparams.attention_dropout,
|
||||
# nhead=self.hparams.nhead,
|
||||
# )
|
||||
# decoder_norm = nn.LayerNorm(hidden_out_size)
|
||||
# self.decoder = nn.TransformerDecoder(
|
||||
# layer_dec,
|
||||
# num_layers=self.hparams.nlayers,
|
||||
# norm=decoder_norm
|
||||
# )
|
||||
self.mean = nn.Linear(hidden_out_size, self.hparams.y_dim)
|
||||
self.std = nn.Linear(hidden_out_size, self.hparams.y_dim)
|
||||
self._use_lvar = 0
|
||||
|
||||
@@ -38,6 +38,10 @@ def plot_rows(
|
||||
target_y_rows = np.exp(target_y_rows) - eps
|
||||
context_y_rows = np.exp(context_y_rows) - eps
|
||||
|
||||
# I don't want to show too much context
|
||||
context_y_rows = context_y_rows[-96:]
|
||||
x_context_rows = x_context_rows[-96:]
|
||||
|
||||
# Plot everything
|
||||
j = 0
|
||||
label = "energy(kWh/hh)"
|
||||
@@ -121,7 +125,7 @@ def plot_from_loader(
|
||||
y_pred, losses, extra = model(context_x, context_y, target_x, target_y)
|
||||
loss_test = losses["loss"] if "loss" in losses else 0.
|
||||
|
||||
y_std = extra["dist"].scale
|
||||
y_std = extra["y_dist"].scale
|
||||
|
||||
if plot:
|
||||
plt.figure()
|
||||
|
||||
@@ -81,7 +81,7 @@ def objective(trial, PL_MODEL_CLS, name, user_attrs):
|
||||
model.logger.experiment.add_hparams(trial.params, model.logger.metrics[-1])
|
||||
model.logger.save()
|
||||
|
||||
return model.logger.metrics[-1]["test_loss"]
|
||||
return model.logger.metrics[-1]["agg_test_loss"]
|
||||
|
||||
|
||||
def add_number(trial: optuna.Trial, model_dir: Path):
|
||||
|
||||
@@ -29,6 +29,7 @@ def agg_logs(outputs):
|
||||
{'val_loss': 0.7047,
|
||||
'log': {'val_loss': 0.7047, 'val_loss_p': 0.7047}},
|
||||
]
|
||||
-> {'agg_val_loss': 0.7126500010490417, 'log': {'agg_val_loss': 0.7126500010490417, 'agg_val_loss_p': 0.7126500010490417, 'agg_val_loss_kl': 2.6101499770447845e-06, 'agg_val_loss_mse': 0.17669999599456787}}
|
||||
|
||||
"""
|
||||
if isinstance(outputs, dict):
|
||||
@@ -41,7 +42,7 @@ def agg_logs(outputs):
|
||||
# Take mean of sub dicts
|
||||
keys = outputs[0][j].keys()
|
||||
aggs[j] = {
|
||||
k: torch.stack([x[j][k] for x in outputs if k in x[j]])
|
||||
'agg_'+k: torch.stack([x[j][k] for x in outputs if k in x[j]])
|
||||
.mean()
|
||||
.cpu()
|
||||
.item()
|
||||
@@ -49,7 +50,7 @@ def agg_logs(outputs):
|
||||
}
|
||||
else:
|
||||
# Take mean of numbers
|
||||
aggs[j] = (
|
||||
aggs['agg_'+j] = (
|
||||
torch.stack([x[j] for x in outputs if j in x]).mean().cpu().item()
|
||||
)
|
||||
return aggs
|
||||
|
||||
+1023
-815
File diff suppressed because one or more lines are too long
+6
-6
@@ -21,14 +21,14 @@ def test_agg_logs():
|
||||
]
|
||||
r = neural_processes.utils.agg_logs(outputs)
|
||||
assert isinstance(r, dict)
|
||||
assert 'val_loss' in r.keys()
|
||||
assert 'val_loss_kl' in r.keys()
|
||||
assert isinstance(r['val_loss'], float)
|
||||
assert 'agg_val_loss' in r.keys()
|
||||
assert 'agg_val_loss_kl' in r['log'].keys()
|
||||
assert isinstance(r['agg_val_loss'], float)
|
||||
|
||||
outputs = {'val_loss': torch.tensor(0.7206),
|
||||
'log': {'val_loss': torch.tensor(0.7206), 'val_loss_p': torch.tensor(0.7206), 'val_loss_kl': torch.tensor(2.3812e-06), 'val_loss_mse': torch.tensor(0.1838)}}
|
||||
r = neural_processes.utils.agg_logs(outputs)
|
||||
assert isinstance(r, dict)
|
||||
assert 'val_loss' in r.keys()
|
||||
assert 'val_loss_kl' in r.keys()
|
||||
assert isinstance(r['val_loss'], float)
|
||||
assert 'agg_val_loss' in r.keys()
|
||||
assert 'agg_val_loss_kl' in r['log'].keys()
|
||||
assert isinstance(r['agg_val_loss'], float)
|
||||
|
||||
Reference in New Issue
Block a user