This commit is contained in:
wassname
2022-02-11 20:07:18 +08:00
parent 399896bd52
commit 7742b48f69
4 changed files with 4 additions and 944 deletions
File diff suppressed because one or more lines are too long
+4 -33
View File
@@ -3,7 +3,6 @@ from torch import nn
from torch.autograd import grad
from torch.optim import Adam
from torch.nn import functional as F
# from optimization import BertAdam
from matplotlib import pyplot as plt
@@ -61,7 +60,7 @@ class IntensityNet(nn.Module):
return [nll, log_lmbda_mean, int_lmbda_mean, lmbda]
LEAK=1
class GTPP(nn.Module):
def __init__(self, config):
@@ -70,27 +69,16 @@ class GTPP(nn.Module):
self.batch_size = config.batch_size
self.lr = config.lr
self.log_mode = config.log_mode # TODO mean to be used here?
self.log_mode = config.log_mode # TODO meant to be used here?
self.embedding = nn.Embedding(num_embeddings=config.event_class, embedding_dim=config.emb_dim)
self.emb_drop = nn.Dropout(p=config.dropout)
self.lstm = nn.LSTM(input_size=LEAK+config.emb_dim,
self.lstm = nn.LSTM(input_size=1+config.emb_dim,
hidden_size=config.hid_dim,
batch_first=True,
bidirectional=False)
self.intensity_net = IntensityNet(config)
# self.set_optimizer(total_step=1)
# def set_optimizer(self, total_step, use_bert=False):
# if use_bert:
# self.set_optimizer = BertAdam(params=self.parameters(),
# lr=self.lr,
# warmup=0.1,
# t_total=total_step)
# else:
# self.set_optimizer = Adam(self.parameters(), lr=self.lr)
def forward(self, batch):
@@ -98,27 +86,10 @@ class GTPP(nn.Module):
event_seq = event_seq.long()
emb = self.embedding(event_seq)
emb = self.emb_drop(emb)
if LEAK:
lstm_input = torch.cat([emb[:, :-1], time_seq[:, :-1].unsqueeze(-1)], dim=-1)
else:
lstm_input = emb
lstm_input = torch.cat([emb[:, :-1], time_seq[:, :-1].unsqueeze(-1)], dim=-1)
hidden_state, _ = self.lstm(lstm_input)
# FIXME wait we pass the target time into the LSTM. Is this data leakage?
nll, log_lmbda, int_lmbda, lmbda = self.intensity_net(hidden_state, time_seq[:, -1])
return [nll, log_lmbda.detach(), int_lmbda.detach(), lmbda.detach()]
# def train_batch(self, batch):
# self.set_optimizer.zero_grad()
# nll, log_lmbda, int_lmbda, lmbda = self.forward(batch)
# loss = nll
# loss.backward()
# self.set_optimizer.step()
# return nll.item(), log_lmbda.item(), int_lmbda.item(), lmbda
-304
View File
@@ -1,304 +0,0 @@
# # coding=utf-8
# # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
# """PyTorch optimization for BERT model."""
# import math
# import torch
# from torch.optim import Optimizer
# from torch.optim.optimizer import required
# from torch.nn.utils import clip_grad_norm_
# import logging
# import abc
# import sys
# logger = logging.getLogger(__name__)
# if sys.version_info >= (3, 4):
# ABC = abc.ABC
# else:
# ABC = abc.ABCMeta('ABC', (), {})
# class _LRSchedule(ABC):
# """ Parent of all LRSchedules here. """
# warn_t_total = False # is set to True for schedules where progressing beyond t_total steps doesn't make sense
# def __init__(self, warmup=0.002, t_total=-1, **kw):
# """
# :param warmup: what fraction of t_total steps will be used for linear warmup
# :param t_total: how many training steps (updates) are planned
# :param kw:
# """
# super(_LRSchedule, self).__init__(**kw)
# if t_total < 0:
# logger.warning("t_total value of {} results in schedule not being applied".format(t_total))
# if not 0.0 <= warmup < 1.0 and not warmup == -1:
# raise ValueError("Invalid warmup: {} - should be in [0.0, 1.0[ or -1".format(warmup))
# warmup = max(warmup, 0.)
# self.warmup, self.t_total = float(warmup), float(t_total)
# self.warned_for_t_total_at_progress = -1
# def get_lr(self, step, nowarn=False):
# """
# :param step: which of t_total steps we're on
# :param nowarn: set to True to suppress warning regarding training beyond specified 't_total' steps
# :return: learning rate multiplier for current update
# """
# if self.t_total < 0:
# return 1.
# progress = float(step) / self.t_total
# ret = self.get_lr_(progress)
# # warning for exceeding t_total (only active with warmup_linear
# if not nowarn and self.warn_t_total and progress > 1. and progress > self.warned_for_t_total_at_progress:
# logger.warning(
# "Training beyond specified 't_total'. Learning rate multiplier set to {}. Please set 't_total' of {} correctly."
# .format(ret, self.__class__.__name__))
# self.warned_for_t_total_at_progress = progress
# # end warning
# return ret
# @abc.abstractmethod
# def get_lr_(self, progress):
# """
# :param progress: value between 0 and 1 (unless going beyond t_total steps) specifying training progress
# :return: learning rate multiplier for current update
# """
# return 1.
# class ConstantLR(_LRSchedule):
# def get_lr_(self, progress):
# return 1.
# class WarmupCosineSchedule(_LRSchedule):
# """
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
# Decreases learning rate from 1. to 0. over remaining `1 - warmup` steps following a cosine curve.
# If `cycles` (default=0.5) is different from default, learning rate follows cosine function after warmup.
# """
# warn_t_total = True
# def __init__(self, warmup=0.002, t_total=-1, cycles=.5, **kw):
# """
# :param warmup: see LRSchedule
# :param t_total: see LRSchedule
# :param cycles: number of cycles. Default: 0.5, corresponding to cosine decay from 1. at progress==warmup and 0 at progress==1.
# :param kw:
# """
# super(WarmupCosineSchedule, self).__init__(warmup=warmup, t_total=t_total, **kw)
# self.cycles = cycles
# def get_lr_(self, progress):
# if progress < self.warmup:
# return progress / self.warmup
# else:
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
# return 0.5 * (1. + math.cos(math.pi * self.cycles * 2 * progress))
# class WarmupCosineWithHardRestartsSchedule(WarmupCosineSchedule):
# """
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
# If `cycles` (default=1.) is different from default, learning rate follows `cycles` times a cosine decaying
# learning rate (with hard restarts).
# """
# def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
# super(WarmupCosineWithHardRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
# assert(cycles >= 1.)
# def get_lr_(self, progress):
# if progress < self.warmup:
# return progress / self.warmup
# else:
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
# ret = 0.5 * (1. + math.cos(math.pi * ((self.cycles * progress) % 1)))
# return ret
# class WarmupCosineWithWarmupRestartsSchedule(WarmupCosineWithHardRestartsSchedule):
# """
# All training progress is divided in `cycles` (default=1.) parts of equal length.
# Every part follows a schedule with the first `warmup` fraction of the training steps linearly increasing from 0. to 1.,
# followed by a learning rate decreasing from 1. to 0. following a cosine curve.
# """
# def __init__(self, warmup=0.002, t_total=-1, cycles=1., **kw):
# assert(warmup * cycles < 1.)
# warmup = warmup * cycles if warmup >= 0 else warmup
# super(WarmupCosineWithWarmupRestartsSchedule, self).__init__(warmup=warmup, t_total=t_total, cycles=cycles, **kw)
# def get_lr_(self, progress):
# progress = progress * self.cycles % 1.
# if progress < self.warmup:
# return progress / self.warmup
# else:
# progress = (progress - self.warmup) / (1 - self.warmup) # progress after warmup
# ret = 0.5 * (1. + math.cos(math.pi * progress))
# return ret
# class WarmupConstantSchedule(_LRSchedule):
# """
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
# Keeps learning rate equal to 1. after warmup.
# """
# def get_lr_(self, progress):
# if progress < self.warmup:
# return progress / self.warmup
# return 1.
# class WarmupLinearSchedule(_LRSchedule):
# """
# Linearly increases learning rate from 0 to 1 over `warmup` fraction of training steps.
# Linearly decreases learning rate from 1. to 0. over remaining `1 - warmup` steps.
# """
# warn_t_total = True
# def get_lr_(self, progress):
# if progress < self.warmup:
# return progress / self.warmup
# return max((progress - 1.) / (self.warmup - 1.), 0.)
# SCHEDULES = {
# None: ConstantLR,
# "none": ConstantLR,
# "warmup_cosine": WarmupCosineSchedule,
# "warmup_constant": WarmupConstantSchedule,
# "warmup_linear": WarmupLinearSchedule
# }
# class BertAdam(Optimizer):
# """Implements BERT version of Adam algorithm with weight decay fix.
# Params:
# lr: learning rate
# warmup: portion of t_total for the warmup, -1 means no warmup. Default: -1
# t_total: total number of training steps for the learning
# rate schedule, -1 means constant learning rate of 1. (no warmup regardless of warmup setting). Default: -1
# schedule: schedule to use for the warmup (see above).
# Can be `'warmup_linear'`, `'warmup_constant'`, `'warmup_cosine'`, `'none'`, `None` or a `_LRSchedule` object (see below).
# If `None` or `'none'`, learning rate is always kept constant.
# Default : `'warmup_linear'`
# b1: Adams b1. Default: 0.9
# b2: Adams b2. Default: 0.999
# e: Adams epsilon. Default: 1e-6
# weight_decay: Weight decay. Default: 0.01
# max_grad_norm: Maximum norm for the gradients (-1 means no clipping). Default: 1.0
# """
# def __init__(self, params, lr=required, warmup=-1, t_total=-1, schedule='warmup_linear',
# b1=0.9, b2=0.999, e=1e-6, weight_decay=0.01, max_grad_norm=1.0, **kwargs):
# if lr is not required and lr < 0.0:
# raise ValueError("Invalid learning rate: {} - should be >= 0.0".format(lr))
# if not isinstance(schedule, _LRSchedule) and schedule not in SCHEDULES:
# raise ValueError("Invalid schedule parameter: {}".format(schedule))
# if not 0.0 <= b1 < 1.0:
# raise ValueError("Invalid b1 parameter: {} - should be in [0.0, 1.0[".format(b1))
# if not 0.0 <= b2 < 1.0:
# raise ValueError("Invalid b2 parameter: {} - should be in [0.0, 1.0[".format(b2))
# if not e >= 0.0:
# raise ValueError("Invalid epsilon value: {} - should be >= 0.0".format(e))
# # initialize schedule object
# if not isinstance(schedule, _LRSchedule):
# schedule_type = SCHEDULES[schedule]
# schedule = schedule_type(warmup=warmup, t_total=t_total)
# else:
# if warmup != -1 or t_total != -1:
# logger.warning("warmup and t_total on the optimizer are ineffective when _LRSchedule object is provided as schedule. "
# "Please specify custom warmup and t_total in _LRSchedule object.")
# defaults = dict(lr=lr, schedule=schedule,
# b1=b1, b2=b2, e=e, weight_decay=weight_decay,
# max_grad_norm=max_grad_norm)
# super(BertAdam, self).__init__(params, defaults)
# def get_lr(self):
# lr = []
# for group in self.param_groups:
# for p in group['params']:
# state = self.state[p]
# if len(state) == 0:
# return [0]
# lr_scheduled = group['lr']
# lr_scheduled *= group['schedule'].get_lr(state['step'])
# lr.append(lr_scheduled)
# return lr
# def step(self, closure=None):
# """Performs a single optimization step.
# Arguments:
# closure (callable, optional): A closure that reevaluates the model
# and returns the loss.
# """
# loss = None
# if closure is not None:
# loss = closure()
# for group in self.param_groups:
# for p in group['params']:
# if p.grad is None:
# continue
# grad = p.grad.data
# if grad.is_sparse:
# raise RuntimeError('Adam does not support sparse gradients, please consider SparseAdam instead')
# state = self.state[p]
# # State initialization
# if len(state) == 0:
# state['step'] = 0
# # Exponential moving average of gradient values
# state['next_m'] = torch.zeros_like(p.data)
# # Exponential moving average of squared gradient values
# state['next_v'] = torch.zeros_like(p.data)
# next_m, next_v = state['next_m'], state['next_v']
# beta1, beta2 = group['b1'], group['b2']
# # Add grad clipping
# if group['max_grad_norm'] > 0:
# clip_grad_norm_(p, group['max_grad_norm'])
# # Decay the first and second moment running average coefficient
# # In-place operations to update the averages at the same time
# next_m.mul_(beta1).add_(1 - beta1, grad)
# next_v.mul_(beta2).addcmul_(1 - beta2, grad, grad)
# update = next_m / (next_v.sqrt() + group['e'])
# # Just adding the square of the weights to the loss function is *not*
# # the correct way of using L2 regularization/weight decay with Adam,
# # since that will interact with the m and v parameters in strange ways.
# #
# # Instead we want to decay the weights in a manner that doesn't interact
# # with the m/v parameters. This is equivalent to adding the square
# # of the weights to the loss with plain (non-momentum) SGD.
# if group['weight_decay'] > 0.0:
# update += group['weight_decay'] * p.data
# lr_scheduled = group['lr']
# lr_scheduled *= group['schedule'].get_lr(state['step'])
# update_with_lr = lr_scheduled * update
# p.data.add_(-update_with_lr)
# state['step'] += 1
# # step_size = lr_scheduled * math.sqrt(bias_correction2) / bias_correction1
# # No bias correction
# # bias_correction1 = 1 - beta1 ** state['step']
# # bias_correction2 = 1 - beta2 ** state['step']
# return loss
-74
View File
@@ -38,80 +38,6 @@ def get_parser():
parser.add_argument("--mean_first", action="store_true", help="in model take mean first")
return parser
# if __name__ == "__main__":
# parser = get_parser()
# config = parser.parse_args()
# path = 'data/'
# if config.data == 'exponential_hawkes':
# train_data = read_timeseries(path + config.data + '_training.csv')
# val_data = read_timeseries(path + config.data + '_validation.csv')
# test_data = read_timeseries(path + config.data + '_testing.csv')
# else:
# raise NotImplemented('only exponential_hawkes')
# train_timeseq, train_eventseq = generate_sequence(train_data, config.seq_len, log_mode=config.log_mode)
# train_loader = DataLoader(torch.utils.data.TensorDataset(train_timeseq, train_eventseq), shuffle=True, batch_size=config.batch_size)
# val_timeseq, val_eventseq = generate_sequence(val_data, config.seq_len, log_mode=config.log_mode)
# val_loader = DataLoader(torch.utils.data.TensorDataset(val_timeseq, val_eventseq), shuffle=False, batch_size=len(val_data))
# model = GTPP(config)
# best_loss = 1e3
# patients = 0
# tol = 30
# for epoch in range(config.epochs):
# model.train()
# loss1 = loss2 = loss3 = 0
# for batch in train_loader:
# loss, log_lmbda, int_lmbda, lmbda = model.train_batch(batch)
# loss1 += loss
# loss2 += log_lmbda
# loss3 += int_lmbda
# model.eval()
# for batch in val_loader:
# val_loss, val_log_lmbda, val_int_lmbda, _ = model(batch)
# if best_loss > val_loss:
# best_loss = val_loss.item()
# else:
# patients += 1
# if patients >= tol:
# print("Early Stop")
# print("epoch", epoch)
# plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
# break
# if epoch % config.prt_evry == 0:
# print("Epochs:{}".format(epoch))
# print("Training Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(loss1/train_timeseq.size(0), -loss2 / train_timeseq.size(0), loss3 / train_timeseq.size(0)))
# print("Validation Negative Log Likelihood:{} Log Lambda:{}: Integral Lambda:{}".format(val_loss / val_timeseq.size(0),
# -val_log_lmbda / val_timeseq.size(0),
# val_int_lmbda/val_timeseq.size(0)))
# plt_lmbda(train_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
# # plt_lmbda(test_data[0], model=model, seq_len=config.seq_len, log_mode=config.log_mode)
# print("end")