This commit is contained in:
wassname
2022-02-11 15:36:01 +08:00
parent c0cfe2186e
commit e8ab8fc1f4
8 changed files with 1422 additions and 24 deletions
+151
View File
@@ -0,0 +1,151 @@
# Created by https://www.gitignore.io/api/linux,python,windows,jupyternotebook
### JupyterNotebook ###
.ipynb_checkpoints
*/.ipynb_checkpoints/*
# Remove previous ipynb_checkpoints
# git rm -r .ipynb_checkpoints/
#
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db
# Folder config file
Desktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.gitignore.io/api/linux,python,windows,jupyternotebook
.demo_cache.sqlite
demo_cache.sqlite
File diff suppressed because one or more lines are too long
+533
View File
File diff suppressed because one or more lines are too long
+153
View File
@@ -0,0 +1,153 @@
# %reload_ext autoreload
# %autoreload 2
import matplotlib.pyplot as plt
# %matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (12.0, 3)
import numpy as np
import tqdm
import torch
from argparse import ArgumentParser
from torch.utils.data import DataLoader
from utils import read_timeseries,generate_sequence, plt_lmbda
from module import GTPP
from run import get_parser
# +
parser = get_parser()
config = parser.parse_args([])
path = 'data/'
if config.data == 'exponential_hawkes':
train_data = read_timeseries(path + config.data + '_training.csv')
val_data = read_timeseries(path + config.data + '_validation.csv')
test_data = read_timeseries(path + config.data + '_testing.csv')
else:
raise NotImplemented('only exponential_hawkes')
train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
model = GTPP(config)
best_loss = 1e3
patients = 0
tol = 333
for epoch in range(config.epochs):
model.train()
loss1 = loss2 = loss3 = 0
for batch in train_loader:
loss, log_lmbda, int_lmbda, lmbda = model.train_batch(batch)
loss1 += loss
loss2 += log_lmbda
loss3 += int_lmbda
model.eval()
for batch in val_loader:
val_loss, val_log_lmbda, val_int_lmbda, _ = model(batch)
if best_loss > val_loss:
best_loss = val_loss.item()
else:
patients += 1
if patients >= tol:
print("Early Stop")
print("epoch", epoch)
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
break
if epoch % config.prt_evry == 0:
print("Epochs:{}".format(epoch))
print("Training : Negative Log Likelihood:{:2.6f} Log Lambda:{:2.6f}: Integral Lambda:{:2.6f}".format(loss1/train_timeseq.size(0), -loss2 / train_timeseq.size(0), loss3 / train_timeseq.size(0)))
print("Validation: Negative Log Likelihood:{:2.6f} Log Lambda:{:2.6f}: Integral Lambda:{:2.6f}".format(val_loss / val_timeseq.size(0),
-val_log_lmbda / val_timeseq.size(0),
val_int_lmbda/val_timeseq.size(0)))
plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
plt_lmbda(test_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
print("end")
# -
# +
# class CryptoTraderPL_NLL(pl.LightningModule):
# def __init__(self, config):
# super().__init__()
# self.config = config
# self._model = GTPP(config)
# def forward(self, x):
# return self._model(x)
# def training_step(self, batch, batch_idx, phase='train'):
# """
# Training step which runs for N steps, and get loss over all of them
# """
# x, l, r = batch
# y_pred = self._model(x)
# # we have multiple targets. So move them to batch
# l2 = l.reshape(-1)
# y_pred2 = y_pred.reshape((*l2.shape, 3))
# loss = F.nll_loss(y_pred2, l2)
# # record weights
# self.log_dict({
# f'loss/{phase}': loss,
# }, prog_bar=True)
# assert torch.isfinite(loss)
# return loss
# def validation_step(self, batch, batch_idx):
# return self.training_step(batch, batch_idx, phase='val')
# def predict_step(self, batch, batch_idx):
# x, y, r = batch
# y_pred = self.forward(x)
# return y_pred, y, r
# def configure_optimizers(self):
# optim = Ranger21(self.parameters(),
# lr=self.train_kwargs['lr'],
# num_epochs=num_epochs,
# num_batches_per_epoch=num_batches_per_epoch,
# weight_decay=self.train_kwargs['weight_decay'])
# return {'optimizer': optim, 'monitor': 'loss/val'}
# -
+22 -8
View File
@@ -18,26 +18,40 @@ class IntensityNet(nn.Module):
self.module_list = nn.ModuleList([nn.Linear(in_features=config.mlp_dim, out_features=config.mlp_dim) for _ in range(config.mlp_layer-1)])
self.linear3 = nn.Sequential(nn.Linear(in_features=config.mlp_dim, out_features=1), nn.Softplus())
self.mean_first = config.mean_first
self.log_t = config.log_t
def forward(self, hidden_state, target_time):
eps = 1e-10
for p in self.parameters():
p.data *= (p.data>=0)
target_time.requires_grad_(True)
if self.log_t:
target_time = torch.log(target_time+eps)
t = self.linear1(target_time.unsqueeze(dim=-1))
out = F.tanh(self.linear2(torch.cat([hidden_state[:,-1,:], t], dim=-1)))
out = torch.tanh(self.linear2(torch.cat([hidden_state[:,-1,:], t], dim=-1)))
for layer in self.module_list:
out = F.tanh(layer(out))
out = torch.tanh(layer(out))
int_lmbda = F.softplus(self.linear3(out))
int_lmbda = torch.mean(int_lmbda)
int_lmbda_mean = int_lmbda.mean()
lmbda = grad(int_lmbda, target_time, create_graph=True, retain_graph=True)[0]
nll = torch.add(int_lmbda, -torch.mean(torch.log((lmbda+1e-10))))
lmbda = grad(
int_lmbda.mean(),
target_time,
create_graph=True, retain_graph=True)[0]
log_lmbda = (lmbda + eps).log()
log_lmbda_mean = log_lmbda.mean()
return [nll, torch.mean(torch.log((lmbda+1e-10))), int_lmbda, lmbda]
if self.mean_first:
nll = int_lmbda_mean - log_lmbda_mean
else:
nll = (int_lmbda - log_lmbda).mean()
return [nll, log_lmbda_mean, int_lmbda_mean, lmbda]
class GTPP(nn.Module):
@@ -47,7 +61,7 @@ class GTPP(nn.Module):
self.batch_size = config.batch_size
self.lr = config.lr
self.log_mode = config.log_mode
self.log_mode = config.log_mode # TODO mean to be used here?
self.embedding = nn.Embedding(num_embeddings=config.event_class, embedding_dim=config.emb_dim)
@@ -80,7 +94,7 @@ class GTPP(nn.Module):
nll, log_lmbda, int_lmbda, lmbda = self.intensity_net(hidden_state, time_seq[:, -1])
return [nll, log_lmbda, int_lmbda, lmbda]
return [nll, log_lmbda.detach(), int_lmbda.detach(), lmbda.detach()]
def train_batch(self, batch):
+8
View File
@@ -1,3 +1,11 @@
Modified by wassname from the below:
Changes:
- [ ] try log t
- [ ] try not mean as much in intensity layer
- [ ] use pytorch lightning
# Fully Neural Network based Model for General Temporal Point Process(Neurips 2019,Takahiro Omi)
This code is pytorch version of implementation for Neural Temporal Point Process.
+17 -10
View File
@@ -11,13 +11,10 @@ from torch.utils.data import DataLoader
from utils import read_timeseries,generate_sequence, plt_lmbda
from module import GTPP
if __name__ == "__main__":
def get_parser():
parser = ArgumentParser()
parser.add_argument("--data", type=str, default='exponential_hawkes')
parser.add_argument("--model", type=str, default='GTPP')
# parser.add_argument("--model", type=str, default='GTPP')
parser.add_argument("--seq_len", type=int, default=20)
parser.add_argument("--emb_dim", type=int, default=10)
parser.add_argument("--hid_dim", type=int, default=64)
@@ -29,13 +26,21 @@ if __name__ == "__main__":
parser.add_argument("--epochs", type=float, default=100)
parser.add_argument("--lr", type=float, default=1e-3)
parser.add_argument("--dropout", type=float, default=0.1)
parser.add_argument("--prt_evry", type=int, default=5)
parser.add_argument("--early_stop", type=bool, default=True)
parser.add_argument("--prt_evry", type=int, default=15)
# parser.add_argument("--early_stop", type=bool, default=True) # on by default
## Alpha ??
parser.add_argument("--alpha", type=float, default=0.05)
parser.add_argument("--alpha", type=float, default=0.05, help='future discount factor for display true event probability')
parser.add_argument("--importance_weight", action="store_true")
parser.add_argument("--log_mode", type=bool, default=False)
# parser.add_argument("--importance_weight", action="store_true") # not used
parser.add_argument("--log_mode", type=bool, default=False, help="generate sequence in log mode")
parser.add_argument("--log_t", action="store_true", help="use log of time in model inputs")
parser.add_argument("--mean_first", action="store_true", help="in model take mean first")
return parser
if __name__ == "__main__":
parser = get_parser()
config = parser.parse_args()
@@ -47,6 +52,8 @@ if __name__ == "__main__":
train_data = read_timeseries(path + config.data + '_training.csv')
val_data = read_timeseries(path + config.data + '_validation.csv')
test_data = read_timeseries(path + config.data + '_testing.csv')
else:
raise NotImplemented('only exponential_hawkes')
+5 -6
View File
@@ -56,10 +56,8 @@ def plt_lmbda(timeseries, model, seq_len, log_mode=False, dt=0.01, lmbda0=0.2, a
# exponential_hwakes : lmbda0, alpha, beta: 0.2, 0.8, 1.0
# lmbda = lambda0 + alpha*sum(exp{-beta*(t-t_i)})
lmbda_dict[0] = np.zeros(t_span.shape)
for t, e in timeseries:
target = (t_span > t)
lmbda_dict[0][target] += alpha*np.exp(-beta*(t_span[target]-t))
@@ -71,7 +69,8 @@ def plt_lmbda(timeseries, model, seq_len, log_mode=False, dt=0.01, lmbda0=0.2, a
_, _, _, pred_dict[0] = model((test_timeseq, test_eventseq))
plt.plot(t_span, lmbda_dict[0], color='green')
plt.plot([t for t, e in timeseries][seq_len-1:], np.array(pred_dict[0].detach()), color='olive')
plt.scatter([t for t, e in timeseries], [-1 for _ in timeseries], color='blue')
plt.show()
plt.plot(t_span, lmbda_dict[0], color='green', label='true prob')
plt.plot([t for t, e in timeseries][seq_len-1:], np.array(pred_dict[0].detach()), color='olive', label='pred prob')
plt.scatter([t for t, e in timeseries], [-1 for _ in timeseries], color='blue', label='events')
plt.legend()
plt.show()