mirror of
https://github.com/wassname/torch-neuralpointprocess.git
synced 2026-06-27 17:02:55 +08:00
use pytorch lightning remove potential leakage, misc
This commit is contained in:
+2
-2
@@ -1,3 +1,4 @@
|
||||
/default/
|
||||
|
||||
# Created by https://www.gitignore.io/api/linux,python,windows,jupyternotebook
|
||||
|
||||
@@ -147,5 +148,4 @@ $RECYCLE.BIN/
|
||||
*.lnk
|
||||
|
||||
# End of https://www.gitignore.io/api/linux,python,windows,jupyternotebook
|
||||
.demo_cache.sqlite
|
||||
demo_cache.sqlite
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
-153
@@ -1,153 +0,0 @@
|
||||
# %reload_ext autoreload
|
||||
# %autoreload 2
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
plt.style.use('ggplot')
|
||||
plt.rcParams['figure.figsize'] = (12.0, 3)
|
||||
|
||||
import numpy as np
|
||||
import tqdm
|
||||
import torch
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from utils import read_timeseries,generate_sequence, plt_lmbda
|
||||
from module import GTPP
|
||||
from run import get_parser
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# +
|
||||
parser = get_parser()
|
||||
config = parser.parse_args([])
|
||||
|
||||
path = 'data/'
|
||||
|
||||
if config.data == 'exponential_hawkes':
|
||||
|
||||
train_data = read_timeseries(path + config.data + '_training.csv')
|
||||
val_data = read_timeseries(path + config.data + '_validation.csv')
|
||||
test_data = read_timeseries(path + config.data + '_testing.csv')
|
||||
else:
|
||||
raise NotImplemented('only exponential_hawkes')
|
||||
|
||||
|
||||
|
||||
train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
|
||||
train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
|
||||
val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
|
||||
val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
|
||||
|
||||
model = GTPP(config)
|
||||
|
||||
best_loss = 1e3
|
||||
patients = 0
|
||||
tol = 333
|
||||
|
||||
for epoch in range(config.epochs):
|
||||
|
||||
model.train()
|
||||
|
||||
loss1 = loss2 = loss3 = 0
|
||||
|
||||
for batch in train_loader:
|
||||
loss, log_lmbda, int_lmbda, lmbda = model.train_batch(batch)
|
||||
|
||||
loss1 += loss
|
||||
loss2 += log_lmbda
|
||||
loss3 += int_lmbda
|
||||
|
||||
|
||||
model.eval()
|
||||
|
||||
for batch in val_loader:
|
||||
val_loss, val_log_lmbda, val_int_lmbda, _ = model(batch)
|
||||
|
||||
if best_loss > val_loss:
|
||||
best_loss = val_loss.item()
|
||||
else:
|
||||
patients += 1
|
||||
if patients >= tol:
|
||||
print("Early Stop")
|
||||
print("epoch", epoch)
|
||||
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
break
|
||||
|
||||
if epoch % config.prt_evry == 0:
|
||||
print("Epochs:{}".format(epoch))
|
||||
print("Training : Negative Log Likelihood:{:2.6f} Log Lambda:{:2.6f}: Integral Lambda:{:2.6f}".format(loss1/train_timeseq.size(0), -loss2 / train_timeseq.size(0), loss3 / train_timeseq.size(0)))
|
||||
print("Validation: Negative Log Likelihood:{:2.6f} Log Lambda:{:2.6f}: Integral Lambda:{:2.6f}".format(val_loss / val_timeseq.size(0),
|
||||
-val_log_lmbda / val_timeseq.size(0),
|
||||
val_int_lmbda/val_timeseq.size(0)))
|
||||
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt_lmbda(test_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
|
||||
|
||||
print("end")
|
||||
# -
|
||||
|
||||
|
||||
# +
|
||||
# class CryptoTraderPL_NLL(pl.LightningModule):
|
||||
# def __init__(self, config):
|
||||
# super().__init__()
|
||||
# self.config = config
|
||||
# self._model = GTPP(config)
|
||||
|
||||
# def forward(self, x):
|
||||
# return self._model(x)
|
||||
|
||||
# def training_step(self, batch, batch_idx, phase='train'):
|
||||
# """
|
||||
# Training step which runs for N steps, and get loss over all of them
|
||||
# """
|
||||
# x, l, r = batch
|
||||
# y_pred = self._model(x)
|
||||
|
||||
# # we have multiple targets. So move them to batch
|
||||
# l2 = l.reshape(-1)
|
||||
# y_pred2 = y_pred.reshape((*l2.shape, 3))
|
||||
# loss = F.nll_loss(y_pred2, l2)
|
||||
|
||||
# # record weights
|
||||
# self.log_dict({
|
||||
# f'loss/{phase}': loss,
|
||||
# }, prog_bar=True)
|
||||
|
||||
# assert torch.isfinite(loss)
|
||||
# return loss
|
||||
|
||||
# def validation_step(self, batch, batch_idx):
|
||||
# return self.training_step(batch, batch_idx, phase='val')
|
||||
|
||||
# def predict_step(self, batch, batch_idx):
|
||||
# x, y, r = batch
|
||||
# y_pred = self.forward(x)
|
||||
# return y_pred, y, r
|
||||
|
||||
# def configure_optimizers(self):
|
||||
# optim = Ranger21(self.parameters(),
|
||||
# lr=self.train_kwargs['lr'],
|
||||
# num_epochs=num_epochs,
|
||||
# num_batches_per_epoch=num_batches_per_epoch,
|
||||
# weight_decay=self.train_kwargs['weight_decay'])
|
||||
# return {'optimizer': optim, 'monitor': 'loss/val'}
|
||||
# -
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+2885
File diff suppressed because one or more lines are too long
+181
@@ -0,0 +1,181 @@
|
||||
# %reload_ext autoreload
|
||||
# %autoreload 2
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
# %matplotlib inline
|
||||
plt.style.use('ggplot')
|
||||
plt.rcParams['figure.figsize'] = (12.0, 3)
|
||||
|
||||
import numpy as np
|
||||
import tqdm
|
||||
import torch
|
||||
from torch.utils.data import DataLoader
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
from utils import read_timeseries,generate_sequence, plt_lmbda
|
||||
from module import GTPP
|
||||
from run import get_parser
|
||||
|
||||
parser = get_parser()
|
||||
argv = """
|
||||
--epochs=100
|
||||
""".replace('\n', '').split()
|
||||
config = parser.parse_args(argv)
|
||||
config
|
||||
|
||||
# # Data
|
||||
|
||||
# +
|
||||
|
||||
path = 'data/'
|
||||
|
||||
if config.data == 'exponential_hawkes':
|
||||
|
||||
train_data = read_timeseries(path + config.data + '_training.csv')
|
||||
val_data = read_timeseries(path + config.data + '_validation.csv')
|
||||
test_data = read_timeseries(path + config.data + '_testing.csv')
|
||||
else:
|
||||
raise NotImplemented('only exponential_hawkes')
|
||||
|
||||
|
||||
train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
|
||||
train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
|
||||
|
||||
val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
|
||||
val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
|
||||
|
||||
# -
|
||||
|
||||
# # Model
|
||||
|
||||
import torch.optim
|
||||
|
||||
|
||||
# +
|
||||
import pytorch_lightning as pl
|
||||
|
||||
class CryptoTraderNPP(pl.LightningModule):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
self._model = GTPP(config)
|
||||
|
||||
def forward(self, x):
|
||||
return self._model(x)
|
||||
|
||||
def training_step(self, batch, batch_idx, phase='train'):
|
||||
torch.set_grad_enabled(True) # we need grad event in val and test
|
||||
|
||||
loss, log_lmbda, int_lmbda, lmbda = self._model(batch)
|
||||
|
||||
if phase!='train':
|
||||
# free the graph, free mem
|
||||
loss = loss.detach()
|
||||
|
||||
# record weights
|
||||
self.log_dict({
|
||||
f'loss/{phase}': loss,
|
||||
}, prog_bar=True)
|
||||
|
||||
assert torch.isfinite(loss)
|
||||
return loss
|
||||
|
||||
def validation_step(self, batch, batch_idx):
|
||||
return self.training_step(batch, batch_idx, phase='val')
|
||||
|
||||
def predict_step(self, batch, batch_idx):
|
||||
y_pred = self.forward(batch)
|
||||
# on predict we want to return multiple values, not just the loss
|
||||
return (y_pred, *batch)
|
||||
|
||||
def on_phase_end(self) -> None:
|
||||
# this seems to help with cuda memory
|
||||
self._model.zero_grad()
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
def on_train_end(self):
|
||||
self.on_phase_end()
|
||||
|
||||
def on_validation_end(self):
|
||||
self.on_phase_end()
|
||||
|
||||
def on_predict_end(self):
|
||||
self.on_phase_end()
|
||||
|
||||
def on_epoch_end(self):
|
||||
if self.trainer.current_epoch%5==0:
|
||||
i=0
|
||||
device = self.device
|
||||
self.eval().cpu()
|
||||
plt.title(f'train {i} e={self.trainer.current_epoch}')
|
||||
plt_lmbda(train_data[i], model=self, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt.show()
|
||||
|
||||
plt.title(f'val {i} e={self.trainer.current_epoch}')
|
||||
plt_lmbda(val_data[i], model=self, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt.show()
|
||||
|
||||
model.to(device).train()
|
||||
|
||||
def configure_optimizers(self):
|
||||
optim = torch.optim.Adam(self.parameters(), lr=config.lr)
|
||||
return {'optimizer': optim, 'monitor': 'loss/val'}
|
||||
|
||||
|
||||
# -
|
||||
model = CryptoTraderNPP(config)
|
||||
model
|
||||
|
||||
# # Train
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.loggers import CSVLogger
|
||||
|
||||
trainer = pl.Trainer(
|
||||
max_epochs=config.epochs,
|
||||
gpus=1,
|
||||
logger=[
|
||||
CSVLogger('../outputs/logs')
|
||||
],
|
||||
)
|
||||
|
||||
trainer.fit(model, train_loader, val_loader)
|
||||
|
||||
# # Hist
|
||||
|
||||
csv_logger = trainer.logger[0]
|
||||
hp = Path(csv_logger.experiment.metrics_file_path)
|
||||
df = pd.read_csv(hp).groupby('epoch').min()[['loss/train', 'loss/val']]
|
||||
df.plot(logy=True)
|
||||
plt.show()
|
||||
df.plot()
|
||||
|
||||
# # Plot
|
||||
|
||||
|
||||
# +
|
||||
i=0
|
||||
|
||||
plt.title(f'train {i}')
|
||||
plt_lmbda(train_data[i], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt.show()
|
||||
|
||||
plt.title(f'val {i}')
|
||||
plt_lmbda(val_data[i], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt.show()
|
||||
# -
|
||||
|
||||
plt.title(f'train {i}')
|
||||
plt_lmbda(train_data[i], model=model, seq_len=config.seq_len, log_mode=~config.log_mode)
|
||||
plt.show()
|
||||
|
||||
plt.title(f'train {i}')
|
||||
plt_lmbda(train_data[i], model=model, alpha=0.01, lmbda0=0, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ from torch import nn
|
||||
from torch.autograd import grad
|
||||
from torch.optim import Adam
|
||||
from torch.nn import functional as F
|
||||
from optimization import BertAdam
|
||||
# from optimization import BertAdam
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
@@ -21,12 +21,20 @@ class IntensityNet(nn.Module):
|
||||
self.mean_first = config.mean_first
|
||||
self.log_t = config.log_t
|
||||
|
||||
self.init_weights_positive()
|
||||
|
||||
def init_weights_positive(self):
|
||||
eps = 1e-10
|
||||
for p in self.parameters():
|
||||
p.data = torch.abs(p.data)
|
||||
p.data = torch.clamp(p.data, min=eps)
|
||||
|
||||
|
||||
def forward(self, hidden_state, target_time):
|
||||
eps = 1e-10
|
||||
|
||||
for p in self.parameters():
|
||||
p.data *= (p.data>=0)
|
||||
p.data = torch.clamp(p.data, min=eps)
|
||||
|
||||
target_time.requires_grad_(True)
|
||||
if self.log_t:
|
||||
@@ -53,6 +61,7 @@ class IntensityNet(nn.Module):
|
||||
|
||||
return [nll, log_lmbda_mean, int_lmbda_mean, lmbda]
|
||||
|
||||
LEAK=1
|
||||
class GTPP(nn.Module):
|
||||
|
||||
def __init__(self, config):
|
||||
@@ -66,22 +75,22 @@ class GTPP(nn.Module):
|
||||
|
||||
self.embedding = nn.Embedding(num_embeddings=config.event_class, embedding_dim=config.emb_dim)
|
||||
self.emb_drop = nn.Dropout(p=config.dropout)
|
||||
self.lstm = nn.LSTM(input_size=1+config.emb_dim,
|
||||
self.lstm = nn.LSTM(input_size=LEAK+config.emb_dim,
|
||||
hidden_size=config.hid_dim,
|
||||
batch_first=True,
|
||||
bidirectional=False)
|
||||
self.intensity_net = IntensityNet(config)
|
||||
self.set_optimizer(total_step=1)
|
||||
# self.set_optimizer(total_step=1)
|
||||
|
||||
|
||||
def set_optimizer(self, total_step, use_bert=False):
|
||||
if use_bert:
|
||||
self.set_optimizer = BertAdam(params=self.parameters(),
|
||||
lr=self.lr,
|
||||
warmup=0.1,
|
||||
t_total=total_step)
|
||||
else:
|
||||
self.set_optimizer = Adam(self.parameters(), lr=self.lr)
|
||||
# def set_optimizer(self, total_step, use_bert=False):
|
||||
# if use_bert:
|
||||
# self.set_optimizer = BertAdam(params=self.parameters(),
|
||||
# lr=self.lr,
|
||||
# warmup=0.1,
|
||||
# t_total=total_step)
|
||||
# else:
|
||||
# self.set_optimizer = Adam(self.parameters(), lr=self.lr)
|
||||
|
||||
|
||||
def forward(self, batch):
|
||||
@@ -89,31 +98,27 @@ class GTPP(nn.Module):
|
||||
event_seq = event_seq.long()
|
||||
emb = self.embedding(event_seq)
|
||||
emb = self.emb_drop(emb)
|
||||
lstm_input = torch.cat([emb, time_seq.unsqueeze(-1)], dim=-1)
|
||||
if LEAK:
|
||||
lstm_input = torch.cat([emb[:, :-1], time_seq[:, :-1].unsqueeze(-1)], dim=-1)
|
||||
else:
|
||||
lstm_input = emb
|
||||
hidden_state, _ = self.lstm(lstm_input)
|
||||
|
||||
# FIXME wait we pass the target time into the LSTM. Is this data leakage?
|
||||
nll, log_lmbda, int_lmbda, lmbda = self.intensity_net(hidden_state, time_seq[:, -1])
|
||||
|
||||
return [nll, log_lmbda.detach(), int_lmbda.detach(), lmbda.detach()]
|
||||
|
||||
|
||||
def train_batch(self, batch):
|
||||
|
||||
self.set_optimizer.zero_grad()
|
||||
nll, log_lmbda, int_lmbda, lmbda = self.forward(batch)
|
||||
loss = nll
|
||||
loss.backward()
|
||||
self.set_optimizer.step()
|
||||
|
||||
return nll.item(), log_lmbda.item(), int_lmbda.item(), lmbda
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def train_batch(self, batch):
|
||||
|
||||
# self.set_optimizer.zero_grad()
|
||||
# nll, log_lmbda, int_lmbda, lmbda = self.forward(batch)
|
||||
# loss = nll
|
||||
# loss.backward()
|
||||
# self.set_optimizer.step()
|
||||
|
||||
# return nll.item(), log_lmbda.item(), int_lmbda.item(), lmbda
|
||||
|
||||
|
||||
|
||||
|
||||
+260
-260
@@ -1,304 +1,304 @@
|
||||
# coding=utf-8
|
||||
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""PyTorch optimization for BERT model."""
|
||||
# # coding=utf-8
|
||||
# # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
|
||||
# #
|
||||
# # Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# # you may not use this file except in compliance with the License.
|
||||
# # You may obtain a copy of the License at
|
||||
# #
|
||||
# # http://www.apache.org/licenses/LICENSE-2.0
|
||||
# #
|
||||
# # Unless required by applicable law or agreed to in writing, software
|
||||
# # distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# # See the License for the specific language governing permissions and
|
||||
# # limitations under the License.
|
||||
# """PyTorch optimization for BERT model."""
|
||||
|
||||
import math
|
||||
import torch
|
||||
# import math
|
||||
# import torch
|
||||
|
||||
from torch.optim import Optimizer
|
||||
from torch.optim.optimizer import required
|
||||
from torch.nn.utils import clip_grad_norm_
|
||||
import logging
|
||||
import abc
|
||||
import sys
|
||||
# from torch.optim import Optimizer
|
||||
# from torch.optim.optimizer import required
|
||||
# from torch.nn.utils import clip_grad_norm_
|
||||
# import logging
|
||||
# import abc
|
||||
# import sys
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
if sys.version_info >= (3, 4):
|
||||
ABC = abc.ABC
|
||||
else:
|
||||
ABC = abc.ABCMeta('ABC', (), {})
|
||||
# if sys.version_info >= (3, 4):
|
||||
# ABC = abc.ABC
|
||||
# else:
|
||||
# ABC = abc.ABCMeta('ABC', (), {})
|
||||
|
||||
|
||||
class _LRSchedule(ABC):
|
||||
""" Parent of all LRSchedules here. """
|
||||
warn_t_total = False # is set to True for schedules where progressing beyond t_total steps doesn't make sense
|
||||
def __init__(self, warmup=0.002, t_total=-1, **kw):
|
||||
"""
|
||||
:param warmup: what fraction of t_total steps will be used for linear warmup
|
||||
:param t_total: how many training steps (updates) are planned
|
||||
:param kw:
|
||||
"""
|
||||
super(_LRSchedule, self).__init__(**kw)
|
||||
if t_total < 0:
|
||||
logger.warning("t_total value of {} results in schedule not being applied".format(t_total))
|
||||
if not 0.0 <= warmup < 1.0 and not warmup == -1:
|
||||
raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
|
||||
warmup = max(warmup, 0.)
|
||||
self.warmup, self.t_total = float(warmup), float(t_total)
|
||||
self.warned_for_t_total_at_progress = -1
|
||||
# class _LRSchedule(ABC):
|
||||
# """ Parent of all LRSchedules here. """
|
||||
# warn_t_total = False # is set to True for schedules where progressing beyond t_total steps doesn't make sense
|
||||
# def __init__(self, warmup=0.002, t_total=-1, **kw):
|
||||
# """
|
||||
# :param warmup: what fraction of t_total steps will be used for linear warmup
|
||||
# :param t_total: how many training steps (updates) are planned
|
||||
# :param kw:
|
||||
# """
|
||||
# super(_LRSchedule, self).__init__(**kw)
|
||||
# if t_total < 0:
|
||||
# logger.warning("t_total value of {} results in schedule not being applied".format(t_total))
|
||||
# if not 0.0 <= warmup < 1.0 and not warmup == -1:
|
||||
# raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
|
||||
# warmup = max(warmup, 0.)
|
||||
# self.warmup, self.t_total = float(warmup), float(t_total)
|
||||
# self.warned_for_t_total_at_progress = -1
|
||||
|
||||
def get_lr(self, step, nowarn=False):
|
||||
"""
|
||||
:param step: which of t_total steps we're on
|
||||
:param nowarn: set to True to suppress warning regarding training beyond specified 't_total' steps
|
||||
:return: learning rate multiplier for current update
|
||||
"""
|
||||
if self.t_total < 0:
|
||||
return 1.
|
||||
progress = float(step) / self.t_total
|
||||
ret = self.get_lr_(progress)
|
||||
# warning for exceeding t_total (only active with warmup_linear
|
||||
if not nowarn and self.warn_t_total and progress > 1. and progress > self.warned_for_t_total_at_progress:
|
||||
logger.warning(
|
||||
"Training beyond specified 't_total'. Learning rate multiplier set to {}. Please set 't_total' of {} correctly."
|
||||
.format(ret, self.__class__.__name__))
|
||||
self.warned_for_t_total_at_progress = progress
|
||||
# end warning
|
||||
return ret
|
||||
# def get_lr(self, step, nowarn=False):
|
||||
# """
|
||||
# :param step: which of t_total steps we're on
|
||||
# :param nowarn: set to True to suppress warning regarding training beyond specified 't_total' steps
|
||||
# :return: learning rate multiplier for current update
|
||||
# """
|
||||
# if self.t_total < 0:
|
||||
# return 1.
|
||||
# progress = float(step) / self.t_total
|
||||
# ret = self.get_lr_(progress)
|
||||
# # warning for exceeding t_total (only active with warmup_linear
|
||||
# if not nowarn and self.warn_t_total and progress > 1. and progress > self.warned_for_t_total_at_progress:
|
||||
# logger.warning(
|
||||
# "Training beyond specified 't_total'. Learning rate multiplier set to {}. Please set 't_total' of {} correctly."
|
||||
# .format(ret, self.__class__.__name__))
|
||||
# self.warned_for_t_total_at_progress = progress
|
||||
# # end warning
|
||||
# return ret
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_lr_(self, progress):
|
||||
"""
|
||||
:param progress: value between 0 and 1 (unless going beyond t_total steps) specifying training progress
|
||||
:return: learning rate multiplier for current update
|
||||
"""
|
||||
return 1.
|
||||
# @abc.abstractmethod
|
||||
# def get_lr_(self, progress):
|
||||
# """
|
||||
# :param progress: value between 0 and 1 (unless going beyond t_total steps) specifying training progress
|
||||
# :return: learning rate multiplier for current update
|
||||
# """
|
||||
# return 1.
|
||||
|
||||
|
||||
class ConstantLR(_LRSchedule):
|
||||
def get_lr_(self, progress):
|
||||
return 1.
|
||||
# class ConstantLR(_LRSchedule):
|
||||
# def get_lr_(self, progress):
|
||||
# return 1.
|
||||
|
||||
|
||||
class WarmupCosineSchedule(_LRSchedule):
|
||||
"""
|
||||
Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
Decreases learning rate from 1. to 0. over remaining `1 - warmup` steps following a cosine curve.
|
||||
If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup.
|
||||
"""
|
||||
warn_t_total = True
|
||||
def __init__(self, warmup=0.002, t_total=-1, cycles=.5, **kw):
|
||||
"""
|
||||
:param warmup: see LRSchedule
|
||||
:param t_total: see LRSchedule
|
||||
:param cycles: number of cycles. Default: 0.5, corresponding to cosine decay from 1. at progress==warmup and 0 at progress==1.
|
||||
:param kw:
|
||||
"""
|
||||
super(WarmupCosineSchedule, self).__init__(warmup=warmup, t_total=t_total, **kw)
|
||||
self.cycles = cycles
|
||||
# class WarmupCosineSchedule(_LRSchedule):
|
||||
# """
|
||||
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
# Decreases learning rate from 1. to 0. over remaining `1 - warmup` steps following a cosine curve.
|
||||
# If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup.
|
||||
# """
|
||||
# warn_t_total = True
|
||||
# def __init__(self, warmup=0.002, t_total=-1, cycles=.5, **kw):
|
||||
# """
|
||||
# :param warmup: see LRSchedule
|
||||
# :param t_total: see LRSchedule
|
||||
# :param cycles: number of cycles. Default: 0.5, corresponding to cosine decay from 1. at progress==warmup and 0 at progress==1.
|
||||
# :param kw:
|
||||
# """
|
||||
# super(WarmupCosineSchedule, self).__init__(warmup=warmup, t_total=t_total, **kw)
|
||||
# self.cycles = cycles
|
||||
|
||||
def get_lr_(self, progress):
|
||||
if progress < self.warmup:
|
||||
return progress / self.warmup
|
||||
else:
|
||||
progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
return 0.5 * (1. + math.cos(math.pi * self.cycles * 2 * progress))
|
||||
# def get_lr_(self, progress):
|
||||
# if progress < self.warmup:
|
||||
# return progress / self.warmup
|
||||
# else:
|
||||
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
# return 0.5 * (1. + math.cos(math.pi * self.cycles * 2 * progress))
|
||||
|
||||
|
||||
class WarmupCosineWithHardRestartsSchedule(WarmupCosineSchedule):
|
||||
"""
|
||||
Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
If `cycles` (default=1.) is different from default, learning rate follows `cycles` times a cosine decaying
|
||||
learning rate (with hard restarts).
|
||||
"""
|
||||
def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
|
||||
super(WarmupCosineWithHardRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
|
||||
assert(cycles >= 1.)
|
||||
# class WarmupCosineWithHardRestartsSchedule(WarmupCosineSchedule):
|
||||
# """
|
||||
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
# If `cycles` (default=1.) is different from default, learning rate follows `cycles` times a cosine decaying
|
||||
# learning rate (with hard restarts).
|
||||
# """
|
||||
# def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
|
||||
# super(WarmupCosineWithHardRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
|
||||
# assert(cycles >= 1.)
|
||||
|
||||
def get_lr_(self, progress):
|
||||
if progress < self.warmup:
|
||||
return progress / self.warmup
|
||||
else:
|
||||
progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
ret = 0.5 * (1. + math.cos(math.pi * ((self.cycles * progress) % 1)))
|
||||
return ret
|
||||
# def get_lr_(self, progress):
|
||||
# if progress < self.warmup:
|
||||
# return progress / self.warmup
|
||||
# else:
|
||||
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
# ret = 0.5 * (1. + math.cos(math.pi * ((self.cycles * progress) % 1)))
|
||||
# return ret
|
||||
|
||||
|
||||
class WarmupCosineWithWarmupRestartsSchedule(WarmupCosineWithHardRestartsSchedule):
|
||||
"""
|
||||
All training progress is divided in `cycles` (default=1.) parts of equal length.
|
||||
Every part follows a schedule with the first `warmup` fraction of the training steps linearly increasing from 0. to 1.,
|
||||
followed by a learning rate decreasing from 1. to 0. following a cosine curve.
|
||||
"""
|
||||
def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
|
||||
assert(warmup * cycles < 1.)
|
||||
warmup = warmup * cycles if warmup >= 0 else warmup
|
||||
super(WarmupCosineWithWarmupRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
|
||||
# class WarmupCosineWithWarmupRestartsSchedule(WarmupCosineWithHardRestartsSchedule):
|
||||
# """
|
||||
# All training progress is divided in `cycles` (default=1.) parts of equal length.
|
||||
# Every part follows a schedule with the first `warmup` fraction of the training steps linearly increasing from 0. to 1.,
|
||||
# followed by a learning rate decreasing from 1. to 0. following a cosine curve.
|
||||
# """
|
||||
# def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
|
||||
# assert(warmup * cycles < 1.)
|
||||
# warmup = warmup * cycles if warmup >= 0 else warmup
|
||||
# super(WarmupCosineWithWarmupRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
|
||||
|
||||
def get_lr_(self, progress):
|
||||
progress = progress * self.cycles % 1.
|
||||
if progress < self.warmup:
|
||||
return progress / self.warmup
|
||||
else:
|
||||
progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
ret = 0.5 * (1. + math.cos(math.pi * progress))
|
||||
return ret
|
||||
# def get_lr_(self, progress):
|
||||
# progress = progress * self.cycles % 1.
|
||||
# if progress < self.warmup:
|
||||
# return progress / self.warmup
|
||||
# else:
|
||||
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
|
||||
# ret = 0.5 * (1. + math.cos(math.pi * progress))
|
||||
# return ret
|
||||
|
||||
|
||||
class WarmupConstantSchedule(_LRSchedule):
|
||||
"""
|
||||
Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
Keeps learning rate equal to 1. after warmup.
|
||||
"""
|
||||
def get_lr_(self, progress):
|
||||
if progress < self.warmup:
|
||||
return progress / self.warmup
|
||||
return 1.
|
||||
# class WarmupConstantSchedule(_LRSchedule):
|
||||
# """
|
||||
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
# Keeps learning rate equal to 1. after warmup.
|
||||
# """
|
||||
# def get_lr_(self, progress):
|
||||
# if progress < self.warmup:
|
||||
# return progress / self.warmup
|
||||
# return 1.
|
||||
|
||||
|
||||
class WarmupLinearSchedule(_LRSchedule):
|
||||
"""
|
||||
Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
Linearly decreases learning rate from 1. to 0. over remaining `1 - warmup` steps.
|
||||
"""
|
||||
warn_t_total = True
|
||||
def get_lr_(self, progress):
|
||||
if progress < self.warmup:
|
||||
return progress / self.warmup
|
||||
return max((progress - 1.) / (self.warmup - 1.), 0.)
|
||||
# class WarmupLinearSchedule(_LRSchedule):
|
||||
# """
|
||||
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
|
||||
# Linearly decreases learning rate from 1. to 0. over remaining `1 - warmup` steps.
|
||||
# """
|
||||
# warn_t_total = True
|
||||
# def get_lr_(self, progress):
|
||||
# if progress < self.warmup:
|
||||
# return progress / self.warmup
|
||||
# return max((progress - 1.) / (self.warmup - 1.), 0.)
|
||||
|
||||
|
||||
SCHEDULES = {
|
||||
None: ConstantLR,
|
||||
"none": ConstantLR,
|
||||
"warmup_cosine": WarmupCosineSchedule,
|
||||
"warmup_constant": WarmupConstantSchedule,
|
||||
"warmup_linear": WarmupLinearSchedule
|
||||
}
|
||||
# SCHEDULES = {
|
||||
# None: ConstantLR,
|
||||
# "none": ConstantLR,
|
||||
# "warmup_cosine": WarmupCosineSchedule,
|
||||
# "warmup_constant": WarmupConstantSchedule,
|
||||
# "warmup_linear": WarmupLinearSchedule
|
||||
# }
|
||||
|
||||
|
||||
class BertAdam(Optimizer):
|
||||
"""Implements BERT version of Adam algorithm with weight decay fix.
|
||||
Params:
|
||||
lr: learning rate
|
||||
warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1
|
||||
t_total: total number of training steps for the learning
|
||||
rate schedule, -1 means constant learning rate of 1. (no warmup regardless of warmup setting). Default: -1
|
||||
schedule: schedule to use for the warmup (see above).
|
||||
Can be `'warmup_linear'`, `'warmup_constant'`, `'warmup_cosine'`, `'none'`, `None` or a `_LRSchedule` object (see below).
|
||||
If `None` or `'none'`, learning rate is always kept constant.
|
||||
Default : `'warmup_linear'`
|
||||
b1: Adams b1. Default: 0.9
|
||||
b2: Adams b2. Default: 0.999
|
||||
e: Adams epsilon. Default: 1e-6
|
||||
weight_decay: Weight decay. Default: 0.01
|
||||
max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
|
||||
"""
|
||||
def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
|
||||
b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, max_grad_norm=1.0, **kwargs):
|
||||
if lr is not required and lr < 0.0:
|
||||
raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
|
||||
if not isinstance(schedule, _LRSchedule) and schedule not in SCHEDULES:
|
||||
raise ValueError("Invalid schedule parameter: {}".format(schedule))
|
||||
if not 0.0 <= b1 < 1.0:
|
||||
raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1))
|
||||
if not 0.0 <= b2 < 1.0:
|
||||
raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2))
|
||||
if not e >= 0.0:
|
||||
raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
|
||||
# initialize schedule object
|
||||
if not isinstance(schedule, _LRSchedule):
|
||||
schedule_type = SCHEDULES[schedule]
|
||||
schedule = schedule_type(warmup=warmup, t_total=t_total)
|
||||
else:
|
||||
if warmup != -1 or t_total != -1:
|
||||
logger.warning("warmup and t_total on the optimizer are ineffective when _LRSchedule object is provided as schedule. "
|
||||
"Please specify custom warmup and t_total in _LRSchedule object.")
|
||||
defaults = dict(lr=lr, schedule=schedule,
|
||||
b1=b1, b2=b2, e=e, weight_decay=weight_decay,
|
||||
max_grad_norm=max_grad_norm)
|
||||
super(BertAdam, self).__init__(params, defaults)
|
||||
# class BertAdam(Optimizer):
|
||||
# """Implements BERT version of Adam algorithm with weight decay fix.
|
||||
# Params:
|
||||
# lr: learning rate
|
||||
# warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1
|
||||
# t_total: total number of training steps for the learning
|
||||
# rate schedule, -1 means constant learning rate of 1. (no warmup regardless of warmup setting). Default: -1
|
||||
# schedule: schedule to use for the warmup (see above).
|
||||
# Can be `'warmup_linear'`, `'warmup_constant'`, `'warmup_cosine'`, `'none'`, `None` or a `_LRSchedule` object (see below).
|
||||
# If `None` or `'none'`, learning rate is always kept constant.
|
||||
# Default : `'warmup_linear'`
|
||||
# b1: Adams b1. Default: 0.9
|
||||
# b2: Adams b2. Default: 0.999
|
||||
# e: Adams epsilon. Default: 1e-6
|
||||
# weight_decay: Weight decay. Default: 0.01
|
||||
# max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
|
||||
# """
|
||||
# def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
|
||||
# b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, max_grad_norm=1.0, **kwargs):
|
||||
# if lr is not required and lr < 0.0:
|
||||
# raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
|
||||
# if not isinstance(schedule, _LRSchedule) and schedule not in SCHEDULES:
|
||||
# raise ValueError("Invalid schedule parameter: {}".format(schedule))
|
||||
# if not 0.0 <= b1 < 1.0:
|
||||
# raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1))
|
||||
# if not 0.0 <= b2 < 1.0:
|
||||
# raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2))
|
||||
# if not e >= 0.0:
|
||||
# raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
|
||||
# # initialize schedule object
|
||||
# if not isinstance(schedule, _LRSchedule):
|
||||
# schedule_type = SCHEDULES[schedule]
|
||||
# schedule = schedule_type(warmup=warmup, t_total=t_total)
|
||||
# else:
|
||||
# if warmup != -1 or t_total != -1:
|
||||
# logger.warning("warmup and t_total on the optimizer are ineffective when _LRSchedule object is provided as schedule. "
|
||||
# "Please specify custom warmup and t_total in _LRSchedule object.")
|
||||
# defaults = dict(lr=lr, schedule=schedule,
|
||||
# b1=b1, b2=b2, e=e, weight_decay=weight_decay,
|
||||
# max_grad_norm=max_grad_norm)
|
||||
# super(BertAdam, self).__init__(params, defaults)
|
||||
|
||||
def get_lr(self):
|
||||
lr = []
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
state = self.state[p]
|
||||
if len(state) == 0:
|
||||
return [0]
|
||||
lr_scheduled = group['lr']
|
||||
lr_scheduled *= group['schedule'].get_lr(state['step'])
|
||||
lr.append(lr_scheduled)
|
||||
return lr
|
||||
# def get_lr(self):
|
||||
# lr = []
|
||||
# for group in self.param_groups:
|
||||
# for p in group['params']:
|
||||
# state = self.state[p]
|
||||
# if len(state) == 0:
|
||||
# return [0]
|
||||
# lr_scheduled = group['lr']
|
||||
# lr_scheduled *= group['schedule'].get_lr(state['step'])
|
||||
# lr.append(lr_scheduled)
|
||||
# return lr
|
||||
|
||||
def step(self, closure=None):
|
||||
"""Performs a single optimization step.
|
||||
# def step(self, closure=None):
|
||||
# """Performs a single optimization step.
|
||||
|
||||
Arguments:
|
||||
closure (callable, optional): A closure that reevaluates the model
|
||||
and returns the loss.
|
||||
"""
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
# Arguments:
|
||||
# closure (callable, optional): A closure that reevaluates the model
|
||||
# and returns the loss.
|
||||
# """
|
||||
# loss = None
|
||||
# if closure is not None:
|
||||
# loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
if grad.is_sparse:
|
||||
raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
# for group in self.param_groups:
|
||||
# for p in group['params']:
|
||||
# if p.grad is None:
|
||||
# continue
|
||||
# grad = p.grad.data
|
||||
# if grad.is_sparse:
|
||||
# raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
|
||||
|
||||
state = self.state[p]
|
||||
# state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
# Exponential moving average of gradient values
|
||||
state['next_m'] = torch.zeros_like(p.data)
|
||||
# Exponential moving average of squared gradient values
|
||||
state['next_v'] = torch.zeros_like(p.data)
|
||||
# # State initialization
|
||||
# if len(state) == 0:
|
||||
# state['step'] = 0
|
||||
# # Exponential moving average of gradient values
|
||||
# state['next_m'] = torch.zeros_like(p.data)
|
||||
# # Exponential moving average of squared gradient values
|
||||
# state['next_v'] = torch.zeros_like(p.data)
|
||||
|
||||
next_m, next_v = state['next_m'], state['next_v']
|
||||
beta1, beta2 = group['b1'], group['b2']
|
||||
# next_m, next_v = state['next_m'], state['next_v']
|
||||
# beta1, beta2 = group['b1'], group['b2']
|
||||
|
||||
# Add grad clipping
|
||||
if group['max_grad_norm'] > 0:
|
||||
clip_grad_norm_(p, group['max_grad_norm'])
|
||||
# # Add grad clipping
|
||||
# if group['max_grad_norm'] > 0:
|
||||
# clip_grad_norm_(p, group['max_grad_norm'])
|
||||
|
||||
# Decay the first and second moment running average coefficient
|
||||
# In-place operations to update the averages at the same time
|
||||
next_m.mul_(beta1).add_(1 - beta1, grad)
|
||||
next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
update = next_m / (next_v.sqrt() + group['e'])
|
||||
# # Decay the first and second moment running average coefficient
|
||||
# # In-place operations to update the averages at the same time
|
||||
# next_m.mul_(beta1).add_(1 - beta1, grad)
|
||||
# next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
# update = next_m / (next_v.sqrt() + group['e'])
|
||||
|
||||
# Just adding the square of the weights to the loss function is *not*
|
||||
# the correct way of using L2 regularization/weight decay with Adam,
|
||||
# since that will interact with the m and v parameters in strange ways.
|
||||
#
|
||||
# Instead we want to decay the weights in a manner that doesn't interact
|
||||
# with the m/v parameters. This is equivalent to adding the square
|
||||
# of the weights to the loss with plain (non-momentum) SGD.
|
||||
if group['weight_decay'] > 0.0:
|
||||
update += group['weight_decay'] * p.data
|
||||
# # Just adding the square of the weights to the loss function is *not*
|
||||
# # the correct way of using L2 regularization/weight decay with Adam,
|
||||
# # since that will interact with the m and v parameters in strange ways.
|
||||
# #
|
||||
# # Instead we want to decay the weights in a manner that doesn't interact
|
||||
# # with the m/v parameters. This is equivalent to adding the square
|
||||
# # of the weights to the loss with plain (non-momentum) SGD.
|
||||
# if group['weight_decay'] > 0.0:
|
||||
# update += group['weight_decay'] * p.data
|
||||
|
||||
lr_scheduled = group['lr']
|
||||
lr_scheduled *= group['schedule'].get_lr(state['step'])
|
||||
# lr_scheduled = group['lr']
|
||||
# lr_scheduled *= group['schedule'].get_lr(state['step'])
|
||||
|
||||
update_with_lr = lr_scheduled * update
|
||||
p.data.add_(-update_with_lr)
|
||||
# update_with_lr = lr_scheduled * update
|
||||
# p.data.add_(-update_with_lr)
|
||||
|
||||
state['step'] += 1
|
||||
# state['step'] += 1
|
||||
|
||||
# step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
|
||||
# No bias correction
|
||||
# bias_correction1 = 1 - beta1 ** state['step']
|
||||
# bias_correction2 = 1 - beta2 ** state['step']
|
||||
# # step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
|
||||
# # No bias correction
|
||||
# # bias_correction1 = 1 - beta1 ** state['step']
|
||||
# # bias_correction2 = 1 - beta2 ** state['step']
|
||||
|
||||
return loss
|
||||
# return loss
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
Modified by wassname from the below:
|
||||
|
||||
Changes:
|
||||
- [ ] clamp weight with epsilon for stablity
|
||||
- from `p.data *= (p.data>=0)`
|
||||
- to `p.data = torch.clamp(p.data, min=eps)`
|
||||
- [ ] try log t
|
||||
- [ ] try not mean as much in intensity layer
|
||||
- [ ] use pytorch lightning
|
||||
|
||||
@@ -38,76 +38,76 @@ def get_parser():
|
||||
parser.add_argument("--mean_first", action="store_true", help="in model take mean first")
|
||||
return parser
|
||||
|
||||
if __name__ == "__main__":
|
||||
# if __name__ == "__main__":
|
||||
|
||||
parser = get_parser()
|
||||
# parser = get_parser()
|
||||
|
||||
|
||||
config = parser.parse_args()
|
||||
# config = parser.parse_args()
|
||||
|
||||
path = 'data/'
|
||||
# path = 'data/'
|
||||
|
||||
if config.data == 'exponential_hawkes':
|
||||
# if config.data == 'exponential_hawkes':
|
||||
|
||||
train_data = read_timeseries(path + config.data + '_training.csv')
|
||||
val_data = read_timeseries(path + config.data + '_validation.csv')
|
||||
test_data = read_timeseries(path + config.data + '_testing.csv')
|
||||
else:
|
||||
raise NotImplemented('only exponential_hawkes')
|
||||
# train_data = read_timeseries(path + config.data + '_training.csv')
|
||||
# val_data = read_timeseries(path + config.data + '_validation.csv')
|
||||
# test_data = read_timeseries(path + config.data + '_testing.csv')
|
||||
# else:
|
||||
# raise NotImplemented('only exponential_hawkes')
|
||||
|
||||
|
||||
|
||||
train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
|
||||
train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
|
||||
val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
|
||||
val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
|
||||
# train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
|
||||
# train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
|
||||
# val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
|
||||
# val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
|
||||
|
||||
model = GTPP(config)
|
||||
# model = GTPP(config)
|
||||
|
||||
best_loss = 1e3
|
||||
patients = 0
|
||||
tol = 30
|
||||
# best_loss = 1e3
|
||||
# patients = 0
|
||||
# tol = 30
|
||||
|
||||
for epoch in range(config.epochs):
|
||||
# for epoch in range(config.epochs):
|
||||
|
||||
model.train()
|
||||
# model.train()
|
||||
|
||||
loss1 = loss2 = loss3 = 0
|
||||
# loss1 = loss2 = loss3 = 0
|
||||
|
||||
for batch in train_loader:
|
||||
loss, log_lmbda, int_lmbda, lmbda = model.train_batch(batch)
|
||||
# for batch in train_loader:
|
||||
# loss, log_lmbda, int_lmbda, lmbda = model.train_batch(batch)
|
||||
|
||||
loss1 += loss
|
||||
loss2 += log_lmbda
|
||||
loss3 += int_lmbda
|
||||
# loss1 += loss
|
||||
# loss2 += log_lmbda
|
||||
# loss3 += int_lmbda
|
||||
|
||||
|
||||
model.eval()
|
||||
# model.eval()
|
||||
|
||||
for batch in val_loader:
|
||||
val_loss, val_log_lmbda, val_int_lmbda, _ = model(batch)
|
||||
# for batch in val_loader:
|
||||
# val_loss, val_log_lmbda, val_int_lmbda, _ = model(batch)
|
||||
|
||||
if best_loss > val_loss:
|
||||
best_loss = val_loss.item()
|
||||
else:
|
||||
patients += 1
|
||||
if patients >= tol:
|
||||
print("Early Stop")
|
||||
print("epoch", epoch)
|
||||
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
break
|
||||
# if best_loss > val_loss:
|
||||
# best_loss = val_loss.item()
|
||||
# else:
|
||||
# patients += 1
|
||||
# if patients >= tol:
|
||||
# print("Early Stop")
|
||||
# print("epoch", epoch)
|
||||
# plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
# break
|
||||
|
||||
if epoch % config.prt_evry == 0:
|
||||
print("Epochs:{}".format(epoch))
|
||||
print("Training Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(loss1/train_timeseq.size(0), -loss2 / train_timeseq.size(0), loss3 / train_timeseq.size(0)))
|
||||
print("Validation Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(val_loss / val_timeseq.size(0),
|
||||
-val_log_lmbda / val_timeseq.size(0),
|
||||
val_int_lmbda/val_timeseq.size(0)))
|
||||
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
# plt_lmbda(test_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
# if epoch % config.prt_evry == 0:
|
||||
# print("Epochs:{}".format(epoch))
|
||||
# print("Training Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(loss1/train_timeseq.size(0), -loss2 / train_timeseq.size(0), loss3 / train_timeseq.size(0)))
|
||||
# print("Validation Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(val_loss / val_timeseq.size(0),
|
||||
# -val_log_lmbda / val_timeseq.size(0),
|
||||
# val_int_lmbda/val_timeseq.size(0)))
|
||||
# plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
# # plt_lmbda(test_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
|
||||
|
||||
|
||||
print("end")
|
||||
# print("end")
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ def generate_sequence(timeseries, seq_len, log_mode=False):
|
||||
|
||||
|
||||
|
||||
def plt_lmbda(timeseries, model, seq_len, log_mode=False, dt=0.01, lmbda0=0.2, alpha=0.8, beta=1.0):
|
||||
def plt_lmbda(timeseries, model, seq_len, log_mode=False, dt=0.01, lmbda0=0., alpha=0.01, beta=1.0):
|
||||
|
||||
lmbda_dict = dict()
|
||||
pred_dict = dict()
|
||||
@@ -71,6 +71,6 @@ def plt_lmbda(timeseries, model, seq_len, log_mode=False, dt=0.01, lmbda0=0.2, a
|
||||
|
||||
plt.plot(t_span, lmbda_dict[0], color='green', label='true prob')
|
||||
plt.plot([t for t, e in timeseries][seq_len-1:], np.array(pred_dict[0].detach()), color='olive', label='pred prob')
|
||||
plt.scatter([t for t, e in timeseries], [-1 for _ in timeseries], color='blue', label='events')
|
||||
plt.scatter([t for t, e in timeseries], [-.01 for _ in timeseries], color='blue', label='events')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
Reference in New Issue
Block a user