This commit is contained in:
wassname
2020-04-20 07:10:08 +08:00
parent acfb574db5
commit b37bf7f7ac
10 changed files with 1050 additions and 859 deletions
+1 -1
View File
@@ -146,7 +146,7 @@ def is_val(f):
return f2i(f) % 7==1
@cache.memoize()
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=40, use_logy=False):
def get_smartmeter_df(indir=Path('./data/smart-meters-in-london'), max_files=60, use_logy=False):
df_weather = load_weather_csv(indir/'weather_hourly_darksky.csv')
+6 -5
View File
@@ -61,7 +61,7 @@ class PL_Seq2Seq(pl.LightningModule):
# tensorboard_logs_str = {k: f"{v}" for k, v in tensorboard_logs.items()}
# print(f"step {self.trainer.global_step}, {outputs}")
return {"val_loss": outputs["val_loss"], "train_loss": train_outputs.get("train_loss", None), "log": {**train_outputs["log"], **outputs["log"]}}
return {"val_loss": outputs["agg_val_loss"], "train_loss": train_outputs.get("agg_train_loss", None), "log": {**train_outputs.get("log", {}), **outputs["log"]}}
def show_image(self):
@@ -82,8 +82,8 @@ class PL_Seq2Seq(pl.LightningModule):
context_x, context_y, target_x, target_y = batch
y_dist = extra['y_dist']
# For test use a diff loss, log_p over next <24h, so it's a standard amount of steps
loss = -y_dist.log_prob(target_y)[:, :24].mean()
# For test use a -logp only
loss = -y_dist.log_prob(target_y).mean()
tensorboard_logs = {"test_" + k: v for k, v in losses.items()}
assert torch.isfinite(loss)
return {"test_loss": loss, "log": tensorboard_logs}
@@ -91,11 +91,10 @@ class PL_Seq2Seq(pl.LightningModule):
def test_end(self, outputs):
outputs = agg_logs(outputs)
logger.info(
f"step {self.trainer.global_step}, {outputs}"
)
return {"test_loss": outputs["test_loss"], "log": outputs["log"]}
return {"test_loss": outputs["agg_test_loss"], "log": outputs["log"]}
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams["learning_rate"])
@@ -153,6 +152,7 @@ class PL_Seq2Seq(pl.LightningModule):
collate_fn=collate_fns(
self.hparams["num_context"], self.hparams["num_extra_target"], sample=False, context_in_target=self.hparams["context_in_target"]
),
num_workers=self.hparams["num_workers"],
)
@pl.data_loader
@@ -172,4 +172,5 @@ class PL_Seq2Seq(pl.LightningModule):
self.hparams["num_context"], self.hparams["num_extra_target"], sample=False, context_in_target=self.hparams["context_in_target"]
),
sampler=sampler,
num_workers=self.hparams["num_workers"],
)
@@ -25,8 +25,8 @@ class PL_NeuralProcess(PL_Seq2Seq):
'det_enc_self_attn_type': 'uniform',
'latent_enc_self_attn_type': 'uniform',
'num_heads_power': 3,
'hidden_dim_power': 5,
'latent_dim_power': 4,
'hidden_dim_power': 6,
'latent_dim_power': 5,
'n_decoder_layers': 4,
'n_latent_encoder_layers': 2,
'use_deterministic_path': False,
@@ -149,7 +149,6 @@ class PL_ANP(PL_NeuralProcess):
@staticmethod
def add_suggest(trial, user_attrs={}):
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_int("attention_layers", 1, 4)
trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
trial.suggest_discrete_uniform(
@@ -165,12 +164,6 @@ class PL_ANP(PL_NeuralProcess):
trial.suggest_uniform("dropout", 0, 0.9)
trial.suggest_uniform("attention_dropout", 0, 0.9)
trial.suggest_categorical(
"latent_enc_self_attn_type", ['uniform', 'multihead']
)
trial.suggest_categorical("det_enc_self_attn_type", ['uniform', 'multihead'])
trial.suggest_categorical("det_enc_cross_attn_type", ['uniform', 'multihead'])
trial.suggest_categorical("batchnorm", [False, True])
trial.suggest_categorical("use_deterministic_path", [False, True])
@@ -205,7 +198,6 @@ class PL_ANPRNN(PL_NeuralProcess):
@staticmethod
def add_suggest(trial, user_attrs={}):
trial.suggest_loguniform("learning_rate", 1e-6, 1e-2)
trial.suggest_int("attention_layers", 1, 4)
trial.suggest_discrete_uniform("num_heads_power", 2, 4, 1)
trial.suggest_discrete_uniform(
@@ -361,9 +361,9 @@ class NeuralProcess(nn.Module):
# Weight loss nearer to prediction time?
weight = (torch.arange(loss_p.shape[1]) + 1).float().to(device)[None, :]
loss_p_weighted = loss_p / torch.sqrt(weight) # We want to weight nearer stuff more
loss_p_weighted = loss_p / torch.sqrt(weight) # We want to weight nearer stuff more
loss_p_weighted = loss_p_weighted.mean()
loss = (loss_kl - log_p).mean()
loss_kl = loss_kl.mean()
log_p = log_p.mean()
@@ -377,4 +377,4 @@ class NeuralProcess(nn.Module):
loss_p_weighted = None
y_pred = dist.rsample() if self.training else dist.loc
return y_pred, dict(loss=loss, loss_p=loss_p, loss_kl=loss_kl, loss_mse=mse_loss, loss_p_weighted=loss_p_weighted), dict(log_sigma=log_sigma, dist=dist)
return y_pred, dict(loss=loss, loss_p=loss_p, loss_kl=loss_kl, loss_mse=mse_loss, loss_p_weighted=loss_p_weighted), dict(log_sigma=log_sigma, y_dist=dist)
-15
View File
@@ -35,21 +35,6 @@ class NetTransformer(nn.Module):
self.encoder = nn.TransformerEncoder(
layer_enc, num_layers=self.hparams.nlayers, norm=encoder_norm
)
# self.dec_norm = BatchNormSequence(self.hparams.x_dim)
# self.dec_emb = nn.Linear(self.hparams.x_dim, hidden_out_size)
# layer_dec = nn.TransformerDecoderLayer(
# d_model=hidden_out_size,
# dim_feedforward=self.hparams.hidden_size,
# dropout=self.hparams.attention_dropout,
# nhead=self.hparams.nhead,
# )
# decoder_norm = nn.LayerNorm(hidden_out_size)
# self.decoder = nn.TransformerDecoder(
# layer_dec,
# num_layers=self.hparams.nlayers,
# norm=decoder_norm
# )
self.mean = nn.Linear(hidden_out_size, self.hparams.y_dim)
self.std = nn.Linear(hidden_out_size, self.hparams.y_dim)
self._use_lvar = 0
+5 -1
View File
@@ -38,6 +38,10 @@ def plot_rows(
target_y_rows = np.exp(target_y_rows) - eps
context_y_rows = np.exp(context_y_rows) - eps
# I don't want to show too much context
context_y_rows = context_y_rows[-96:]
x_context_rows = x_context_rows[-96:]
# Plot everything
j = 0
label = "energy(kWh/hh)"
@@ -121,7 +125,7 @@ def plot_from_loader(
y_pred, losses, extra = model(context_x, context_y, target_x, target_y)
loss_test = losses["loss"] if "loss" in losses else 0.
y_std = extra["dist"].scale
y_std = extra["y_dist"].scale
if plot:
plt.figure()
+1 -1
View File
@@ -81,7 +81,7 @@ def objective(trial, PL_MODEL_CLS, name, user_attrs):
model.logger.experiment.add_hparams(trial.params, model.logger.metrics[-1])
model.logger.save()
return model.logger.metrics[-1]["test_loss"]
return model.logger.metrics[-1]["agg_test_loss"]
def add_number(trial: optuna.Trial, model_dir: Path):
+3 -2
View File
@@ -29,6 +29,7 @@ def agg_logs(outputs):
{'val_loss': 0.7047,
'log': {'val_loss': 0.7047, 'val_loss_p': 0.7047}},
]
-> {'agg_val_loss': 0.7126500010490417, 'log': {'agg_val_loss': 0.7126500010490417, 'agg_val_loss_p': 0.7126500010490417, 'agg_val_loss_kl': 2.6101499770447845e-06, 'agg_val_loss_mse': 0.17669999599456787}}
"""
if isinstance(outputs, dict):
@@ -41,7 +42,7 @@ def agg_logs(outputs):
# Take mean of sub dicts
keys = outputs[0][j].keys()
aggs[j] = {
k: torch.stack([x[j][k] for x in outputs if k in x[j]])
'agg_'+k: torch.stack([x[j][k] for x in outputs if k in x[j]])
.mean()
.cpu()
.item()
@@ -49,7 +50,7 @@ def agg_logs(outputs):
}
else:
# Take mean of numbers
aggs[j] = (
aggs['agg_'+j] = (
torch.stack([x[j] for x in outputs if j in x]).mean().cpu().item()
)
return aggs
+1023 -815
View File
File diff suppressed because one or more lines are too long
+6 -6
View File
@@ -21,14 +21,14 @@ def test_agg_logs():
]
r = neural_processes.utils.agg_logs(outputs)
assert isinstance(r, dict)
assert 'val_loss' in r.keys()
assert 'val_loss_kl' in r.keys()
assert isinstance(r['val_loss'], float)
assert 'agg_val_loss' in r.keys()
assert 'agg_val_loss_kl' in r['log'].keys()
assert isinstance(r['agg_val_loss'], float)
outputs = {'val_loss': torch.tensor(0.7206),
'log': {'val_loss': torch.tensor(0.7206), 'val_loss_p': torch.tensor(0.7206), 'val_loss_kl': torch.tensor(2.3812e-06), 'val_loss_mse': torch.tensor(0.1838)}}
r = neural_processes.utils.agg_logs(outputs)
assert isinstance(r, dict)
assert 'val_loss' in r.keys()
assert 'val_loss_kl' in r.keys()
assert isinstance(r['val_loss'], float)
assert 'agg_val_loss' in r.keys()
assert 'agg_val_loss_kl' in r['log'].keys()
assert isinstance(r['agg_val_loss'], float)