Initial commit

This commit is contained in:
erikwijmans
2017-12-26 18:43:17 -05:00
commit dc4e2b0db3
42 changed files with 3486 additions and 0 deletions
+4
View File
@@ -0,0 +1,4 @@
__pycache__
*.pth*
.autoenv*
runs
+21
View File
@@ -0,0 +1,21 @@
=================
Pointnet2 PyTorch
=================
Partial implemention of `Pointnet2 <https://github.com/charlesq34/pointnet2>`_ written in `PyTorch <http://pytorch.org>`_.
The custom ops used by Pointnet2 are currently **ONLY** supported on the GPU using CUDA.
---------------------
Building CUDA kernels
---------------------
- ``cd utils``
- ``mkdir build && cd build``
- ``cmake .. && make``
------------------
Exampling training
------------------
Two training examples are provided by ``train_sem_seg.py`` and ``train_cls.py``. The datasets for both will be downloaded automatically by default
+2
View File
@@ -0,0 +1,2 @@
indoor3d_sem_seg_hdf5_data
modelnet40_ply_hdf5_2048
+101
View File
@@ -0,0 +1,101 @@
import torch
import torch.utils.data as data
import numpy as np
import os, sys, h5py, subprocess, shlex
def _get_data_files(list_filename):
return [line.rstrip() for line in open(list_filename)]
def _load_data_file(name):
f = h5py.File(name)
data = f['data'][:]
label = f['label'][:]
return data, label
class Indoor3DSemSeg(data.Dataset):
def __init__(self,
num_points,
root,
train=True,
download=True,
data_precent=1.0):
super().__init__()
self.data_precent = data_precent
root = os.path.abspath(root)
self.folder = "indoor3d_sem_seg_hdf5_data"
self.data_dir = os.path.join(root, self.folder)
self.url = "https://shapenet.cs.stanford.edu/media/indoor3d_sem_seg_hdf5_data.zip"
if download and not os.path.exists(self.data_dir):
zipfile = os.path.join(root, os.path.basename(self.url))
subprocess.check_call(
shlex.split("curl {} -o {}".format(self.url, zipfile)))
subprocess.check_call(shlex.split("unzip {} -d {}".format(zipfile, root)))
subprocess.check_call(shlex.split("rm {}".format(zipfile)))
self.train, self.num_points = train, num_points
all_files = _get_data_files(
os.path.join(self.data_dir, "all_files.txt"))
room_filelist = _get_data_files(
os.path.join(self.data_dir, "room_filelist.txt"))
data_batchlist, label_batchlist = [], []
for f in all_files:
d, l = _load_data_file(os.path.join(root, f))
data_batchlist.append(d)
label_batchlist.append(l)
data_batches = np.concatenate(data_batchlist, 0)
labels_batches = np.concatenate(label_batchlist, 0)
test_area = 'Area_5'
train_idxs, test_idxs = [], []
for i, room_name in enumerate(room_filelist):
if test_area in room_name:
test_idxs.append(i)
else:
train_idxs.append(i)
if self.train:
self.points = data_batches[train_idxs, ...]
self.labels = labels_batches[train_idxs, ...]
else:
self.points = data_batches[test_idxs, ...]
self.labels = labels_batches[test_idxs, ...]
def __getitem__(self, idx):
pt_idxs = np.arange(0, self.num_points)
np.random.shuffle(pt_idxs)
current_points = torch.from_numpy(self.points[idx, pt_idxs, :]).type(
torch.FloatTensor)
current_labels = torch.from_numpy(self.labels[idx, pt_idxs]).type(
torch.LongTensor)
return current_points, current_labels
def __len__(self):
return int(self.points.shape[0] * self.data_precent)
def set_num_points(self, pts):
self.num_points = pts
def randomize(self):
pass
if __name__ == "__main__":
dset = Indoor3DSemSeg(16, "./", train=True)
print(dset[0])
print(len(dset))
dloader = torch.utils.data.DataLoader(dset, batch_size=32, shuffle=True)
for i, data in enumerate(dloader, 0):
inputs, labels = data
if i == len(dloader) - 1:
print(inputs.size())
+106
View File
@@ -0,0 +1,106 @@
import torch
import torch.utils.data as data
import numpy as np
import os, sys, h5py, subprocess, shlex
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
def _get_data_files(list_filename):
return [line.rstrip()[5:] for line in open(list_filename)]
def _load_data_file(name):
f = h5py.File(name)
data = f['data'][:]
label = f['label'][:]
return data, label
class ModelNet40Cls(data.Dataset):
def __init__(self,
num_points,
root,
transforms=None,
train=True,
download=True):
super().__init__()
self.transforms = transforms
root = os.path.abspath(root)
self.folder = "modelnet40_ply_hdf5_2048"
self.data_dir = os.path.join(root, self.folder)
self.url = "https://shapenet.cs.stanford.edu/media/modelnet40_ply_hdf5_2048.zip"
if download and not os.path.exists(self.data_dir):
zipfile = os.path.join(root, os.path.basename(self.url))
subprocess.check_call(
shlex.split("curl {} -o {}".format(self.url, zipfile)))
subprocess.check_call(shlex.split("unzip {} -d {}".format(zipfile, root)))
subprocess.check_call(shlex.split("rm {}".format(zipfile)))
self.train, self.num_points = train, num_points
if self.train:
self.files = _get_data_files( \
os.path.join(self.data_dir, 'train_files.txt'))
else:
self.files = _get_data_files( \
os.path.join(self.data_dir, 'test_files.txt'))
point_list, label_list = [], []
for f in self.files:
points, labels = _load_data_file(os.path.join(root, f))
point_list.append(points)
label_list.append(labels)
self.points = np.concatenate(point_list, 0)
self.labels = np.concatenate(label_list, 0)
self.randomize()
def __getitem__(self, idx):
pt_idxs = np.arange(0, self.actual_number_of_points)
np.random.shuffle(pt_idxs)
current_points = self.points[idx, pt_idxs, :]
label = torch.from_numpy(self.labels[idx]).type(torch.LongTensor)
if self.transforms is not None:
current_points = self.transforms(current_points)
return current_points, label
def __len__(self):
return self.points.shape[0]
def set_num_points(self, pts):
self.num_points = pts
def randomize(self):
self.actual_number_of_points = min(
max(
np.random.randint(self.num_points * 0.8,
self.num_points * 1.2), 1),
self.points.shape[1])
if __name__ == "__main__":
from torchvision import transforms
import data_utils as d_utils
transforms = transforms.Compose([
d_utils.PointcloudToTensor(),
d_utils.PointcloudRotate(x_axis=True),
d_utils.PointcloudScale(),
d_utils.PointcloudTranslate(),
d_utils.PointcloudJitter()
])
dset = ModelNet40Cls(16, "./", train=True, transforms=transforms)
print(dset[0][0])
print(dset[0][1])
print(len(dset))
dloader = torch.utils.data.DataLoader(dset, batch_size=32, shuffle=True)
+2
View File
@@ -0,0 +1,2 @@
from .ModelNet40Loader import ModelNet40Cls
from .Indoor3DSemSegLoader import Indoor3DSemSeg
+96
View File
@@ -0,0 +1,96 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import os, sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, "..", "utils"))
import pytorch_utils as pt_utils
from TransformNets import TransformNet, TranslationNet
def model_fn_decorator(criterion):
transform_reg = 1e-3
def ortho_loss(matrix):
return torch.dist(
matrix.bmm(matrix.transpose(1, 2)),
Variable(
torch.eye(matrix.size(1), matrix.size(2)).type(
torch.cuda.FloatTensor)))
def wrapped(model, inputs, labels):
labels = labels.squeeze()
preds, end_points = model(inputs)
transform_loss = 0.0
for _, T in end_points.items():
transform_loss += ortho_loss(T)
preds_loss = criterion(preds, labels)
loss = preds_loss + transform_reg * transform_loss
_, classes = torch.max(preds, 1)
acc = (classes == labels).sum()
return preds, loss, acc.data[0]
return wrapped
class PointnetCls(nn.Module):
def __init__(self):
super().__init__()
self.translation_net = TranslationNet()
self.t_net = TransformNet(1, 3, 3, scale=False)
self.f_net = TransformNet(64, 1, 64, scale=False)
self.input_mlp = nn.Sequential(
pt_utils.Conv2d(1, 64, [1, 3], bn=True),
pt_utils.Conv2d(64, 64, bn=True))
self.second_mlp = pt_utils.SharedMLP([64, 64, 128, 1024], bn=True)
self.final_mlp = nn.Sequential(
pt_utils.FC(1024, 512, bn=True),
pt_utils.FC(512, 256, bn=True),
nn.Dropout(0.3), pt_utils.FC(256, 40, activation=None))
def forward(self, points: torch.Tensor):
batch_size, n_points, _ = points.size()
end_points = {}
points = points + self.translation_net(points).unsqueeze(1)
points, transform = self.apply_transform(
points, *self.t_net(points.unsqueeze(1)))
points = self.input_mlp(points.unsqueeze(1))
points, transform = self.apply_transform(points.squeeze().transpose(
1, 2), *self.f_net(points))
end_points['trans2'] = transform
points = F.max_pool2d(
self.second_mlp(points.transpose(1, 2).unsqueeze(-1)),
kernel_size=[n_points, 1])
return self.final_mlp(points.view(-1, 1024)), end_points
def apply_transform(self, points, rotation, scale=None):
points = points @ rotation
if scale is not None:
points = points * scale.contiguous().view(-1, 1, 1).repeat(
1, points.size(1), points.size(2))
return points, rotation
if __name__ == "__main__":
from torch.autograd import Variable
model = PointnetCls()
data = Variable(torch.randn(2, 10, 3))
print(model(data))
+181
View File
@@ -0,0 +1,181 @@
import os, sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
sys.path.append(os.path.join(BASE_DIR, "../utils"))
import torch
import torch.nn as nn
from torch.autograd import Variable
import pytorch_utils as pt_utils
from pointnet2_modules import PointnetSAModule, PointnetFPModule, PointnetSAModuleMSG
from pointnet2_utils import RandomDropout
from collections import namedtuple
def model_fn_decorator(criterion):
ModelReturn = namedtuple("ModelReturn", ['preds', 'loss', 'acc'])
def model_fn(model, data, epoch=0, eval=False):
inputs, labels = data
inputs = Variable(inputs.cuda(async=True), volatile=eval)
labels = Variable(labels.cuda(async=True), volatile=eval)
xyz = inputs[..., :3]
if inputs.size(2) > 3:
points = inputs[..., 3:]
else:
points = None
preds = model(xyz, points)
loss = criterion(preds.view(labels.numel(), -1), labels.view(-1))
_, classes = torch.max(preds.data, 2)
acc = (classes == labels.data).sum() / labels.numel()
return ModelReturn(preds, loss, {"acc": acc})
return model_fn
class Pointnet2SSG(nn.Module):
def __init__(self, num_classes, input_channels=9):
super().__init__()
self.initial_dropout = RandomDropout(0.4)
self.SA_module0 = PointnetSAModule(
npoint=1024,
radius=0.1,
nsample=32,
mlp=[input_channels, 32, 32, 64])
self.SA_module1 = PointnetSAModule(
npoint=256, radius=0.2, nsample=32, mlp=[64 + 3, 64, 64, 128])
self.SA_module2 = PointnetSAModule(
npoint=64, radius=0.4, nsample=32, mlp=[128 + 3, 128, 128, 256])
self.SA_module3 = PointnetSAModule(
npoint=16, radius=0.8, nsample=32, mlp=[256 + 3, 256, 256, 512])
self.FP_module0 = PointnetFPModule(mlp=[512 + 256, 256, 256])
self.FP_module1 = PointnetFPModule(mlp=[256 + 128, 256, 256])
self.FP_module2 = PointnetFPModule(mlp=[256 + 64, 256, 128])
self.FP_module3 = PointnetFPModule(mlp=[128 + 6, 128, 128, 128])
self.FC_layer = nn.Sequential(
pt_utils.Conv1d(128, 128, bn=True), nn.Dropout(),
pt_utils.Conv1d(128, num_classes, activation=None))
def forward(self, xyz, points=None):
if points is not None:
tmp = self.initial_dropout(torch.cat([points, xyz], dim=-1))
l0_points, l0_xyz = tmp.split(points.size(-1), dim=-1)
else:
l0_xyz = self.initial_dropout(xyz)
l0_points = None
l1_xyz, l1_points = self.SA_module0(l0_xyz, l0_points)
l2_xyz, l2_points = self.SA_module1(l1_xyz, l1_points)
l3_xyz, l3_points = self.SA_module2(l2_xyz, l2_points)
l4_xyz, l4_points = self.SA_module3(l3_xyz, l3_points)
l3_points = self.FP_module0(l3_xyz, l4_xyz, l3_points, l4_points)
l2_points = self.FP_module1(l2_xyz, l3_xyz, l2_points, l3_points)
l1_points = self.FP_module2(l1_xyz, l2_xyz, l1_points, l2_points)
l0_points = self.FP_module3(l0_xyz, l1_xyz, l0_points,
l1_points).transpose(1, 2)
return self.FC_layer(l0_points).transpose(1, 2).contiguous()
class Pointnet2MSG(nn.Module):
def __init__(self, num_classes, input_channels=9):
super().__init__()
self.initial_dropout = RandomDropout(0.95, inplace=True)
self.initial_dropout = None
c_in = input_channels
self.SA_module0 = PointnetSAModuleMSG(
npoint=1024,
radii=[0.05, 0.1],
nsamples=[16, 32],
mlps=[[c_in, 16, 16, 32], [c_in, 32, 32, 64]])
c_out_0 = 32 + 64
c_in = c_out_0 + 3
self.SA_module1 = PointnetSAModuleMSG(
npoint=256,
radii=[0.1, 0.2],
nsamples=[16, 32],
mlps=[[c_in, 64, 64, 128], [c_in, 64, 96, 128]])
c_out_1 = 128 + 128
c_in = c_out_1 + 3
self.SA_module2 = PointnetSAModuleMSG(
npoint=64,
radii=[0.2, 0.4],
nsamples=[16, 32],
mlps=[[c_in, 128, 196, 256], [c_in, 128, 196, 256]])
c_out_2 = 256 + 256
c_in = c_out_2 + 3
self.SA_module3 = PointnetSAModuleMSG(
npoint=16,
radii=[0.4, 0.8],
nsamples=[16, 32],
mlps=[[c_in, 256, 256, 512], [c_in, 256, 384, 512]])
c_out_3 = 512 + 512
self.FP_module3 = PointnetFPModule(mlp=[c_out_3 + c_out_2, 512, 512])
self.FP_module2 = PointnetFPModule(mlp=[512 + c_out_1, 512, 512])
self.FP_module1 = PointnetFPModule(mlp=[512 + c_out_0, 256, 256])
self.FP_module0 = PointnetFPModule(
mlp=[256 + input_channels - 3, 128, 128])
self.FC_layer = nn.Sequential(
pt_utils.Conv1d(128, 128, bn=True), nn.Dropout(),
pt_utils.Conv1d(128, num_classes, activation=None))
def forward(self, xyz, points=None):
if points is not None and self.initial_dropout is not None:
tmp = self.initial_dropout(torch.cat([points, xyz], dim=-1))
points, xyz = tmp.split(points.size(-1), dim=-1)
elif self.initial_dropout is not None:
xyz = self.initial_dropout(xyz)
l0_xyz, l0_points = xyz, points
l1_xyz, l1_points = self.SA_module0(l0_xyz, l0_points)
l2_xyz, l2_points = self.SA_module1(l1_xyz, l1_points)
l3_xyz, l3_points = self.SA_module2(l2_xyz, l2_points)
l4_xyz, l4_points = self.SA_module3(l3_xyz, l3_points)
l3_points = self.FP_module3(l3_xyz, l4_xyz, l3_points, l4_points)
l2_points = self.FP_module2(l2_xyz, l3_xyz, l2_points, l3_points)
l1_points = self.FP_module1(l1_xyz, l2_xyz, l1_points, l2_points)
l0_points = self.FP_module0(l0_xyz, l1_xyz, l0_points,
l1_points).transpose(1, 2)
return self.FC_layer(l0_points).transpose(1, 2).contiguous()
if __name__ == "__main__":
from torch.autograd import Variable
import numpy as np
import torch.optim as optim
B = 2
N = 32
inputs = torch.randn(B, N, 9).cuda()
labels = torch.from_numpy(np.random.randint(0, 3,
size=B * N)).view(B, N).cuda()
model = Pointnet2MSG(3)
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-5)
model_fn = model_fn_decorator(nn.CrossEntropyLoss())
for _ in range(20):
optimizer.zero_grad()
_, loss, _ = model_fn(model, (inputs, labels))
loss.backward()
print(loss.data[0])
optimizer.step()
+75
View File
@@ -0,0 +1,75 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import os, sys
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(BASE_DIR)
import pytorch_utils as pt_utils
class TransformNet(nn.Module):
def __init__(self, in_size, channels, K, scale=False):
super().__init__()
self.K, self.scale = K, scale
self.convs = nn.Sequential()
self.convs.add_module('conv0',
pt_utils.Conv2d(
in_size, 64, kernel_size=[1, channels], bn=True))
self.convs.add_module('rest',
pt_utils.SharedMLP([64, 128, 1024], bn=True))
self.fc = nn.Sequential(
pt_utils.FC(1024, 512, bn=True), pt_utils.FC(512, 256, bn=True))
outsize = K * K
if scale:
outsize += 1
self.final_W = nn.Parameter(torch.FloatTensor(256, outsize))
self.final_b = nn.Parameter(torch.FloatTensor(outsize))
self.init_weights()
def forward(self, X):
X = self.convs(X)
X = F.adaptive_max_pool2d(X, [1, 1])
X = self.fc(X.view(-1, 1024))
X = X @ self.final_W + self.final_b
rotation = X[:, 0:self.K * self.K].contiguous().view(
-1, self.K, self.K)
if not self.scale:
return rotation, None
scale = X[:, -1].contiguous()
return rotation, scale
def init_weights(self):
torch.nn.init.constant(self.final_W, 0)
self.final_b.data[:self.K * self.K] = (torch.eye(
self.K, self.K) + 1e-1 * torch.randn(self.K, self.K)).view(-1)
if self.scale:
self.final_b.data[-1] = 1.0
class TranslationNet(nn.Module):
def forward(self, X):
return -torch.mean(X, dim=1)
if __name__ == "__main__":
from torch.autograd import Variable
net = TransformNet(5, 1, 3, True)
net.init_weights()
data = Variable(torch.FloatTensor(1, 5, 10, 1))
print(net(data))
net = TranslationNet(5, 1, 3)
net.init_weights()
print(net(data))
+1
View File
@@ -0,0 +1 @@
from .Pointnet2SemSeg import Pointnet2MSG, Pointnet2SSG
+143
View File
@@ -0,0 +1,143 @@
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_sched
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision import transforms
import os
import tensorboard_logger as tb_log
from models import PointnetCls as Pointnet
from models.PointnetCls import model_fn_decorator
from data import ModelNet40Cls
import utils.pytorch_utils as pt_utils
import utils.data_utils as d_utils
import argparse
def parse_args():
parser = argparse.ArgumentParser(description="Arg parser")
parser.add_argument(
"-batch_size", type=int, default=128, help="Batch size [default: 128]")
parser.add_argument(
"-num_points",
type=int,
default=1024,
help="Number of points to train with [default: 1024]")
parser.add_argument(
"-weight_decay", type=float, default=1e-5, help="L2 regularization coeff")
parser.add_argument(
"-lr",
type=float,
default=1e-2,
help="Initial learning rate [default: 1e-2]")
parser.add_argument(
"-lr_decay",
type=float,
default=0.7,
help="Learning rate decay gamma [default: 0.7]")
parser.add_argument(
"-decay_step",
type=int,
default=20,
help="Learning rate decay step [default: 20]")
parser.add_argument(
"-bn_momentum",
type=float,
default=0.5,
help="Initial batch norm momentum [default: 0.5]")
parser.add_argument(
"-bnm_decay",
type=float,
default=0.5,
help="Batch norm momentum decay gamma [default: 0.5]")
parser.add_argument(
"-checkpoint", type=str, default=None, help="Checkpoint to start from")
parser.add_argument(
"-epochs", type=int, default=200, help="Number of epochs to train for")
parser.add_argument(
"-run_name",
type=str,
default="cls_run_1",
help="Name for run in tensorboard_logger")
return parser.parse_args()
lr_clip = 1e-5
bnm_clip = 1e-2
if __name__ == "__main__":
args = parse_args()
BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
transforms = transforms.Compose([
d_utils.PointcloudToTensor(),
d_utils.PointcloudRotate(x_axis=True),
d_utils.PointcloudScale(),
d_utils.PointcloudTranslate(),
d_utils.PointcloudJitter()
])
test_set = ModelNet40Cls(
args.num_points, BASE_DIR, transforms=transforms, train=False)
test_loader = DataLoader(
test_set,
batch_size=args.batch_size,
shuffle=True,
num_workers=2,
pin_memory=True)
train_set = ModelNet40Cls(args.num_points, BASE_DIR, transforms=transforms)
train_loader = DataLoader(
train_set,
batch_size=args.batch_size,
shuffle=True,
num_workers=2,
pin_memory=True)
tb_log.configure('runs/{}'.format(args.run_name))
model = Pointnet()
model.cuda()
optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), lr_clip / args.lr)
bn_lbmd = lambda e: max(args.bn_momentum * args.bnm_decay**(e // args.decay_step), bnm_clip)
if args.checkpoint is not None:
start_epoch, best_prec = pt_utils.load_checkpoint(
model, optimizer, filename=args.checkpoint.split(".")[0])
lr_scheduler = lr_sched.LambdaLR(
optimizer, lr_lambda=lr_lbmd, last_epoch=start_epoch)
bnm_scheduler = pt_utils.BNMomentumScheduler(
model, bn_lambda=bn_lbmd, last_epoch=start_epoch)
else:
lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lambda=lr_lbmd)
bnm_scheduler = pt_utils.BNMomentumScheduler(model, bn_lambda=bn_lbmd)
best_prec = 0.0
start_epoch = 1
model_fn = model_fn_decorator(nn.CrossEntropyLoss())
trainer = pt_utils.Trainer(
model,
model_fn,
optimizer,
checkpoint_name="cls_checkpoint",
best_name="cls_best",
lr_scheduler=lr_scheduler,
bnm_scheduler=bnm_scheduler)
trainer.train(
start_epoch,
args.epochs,
train_loader,
test_loader,
best_prec=best_prec)
if start_epoch == args.epochs:
_ = trainer.eval_epoch(start_epoch, test_loader)
+137
View File
@@ -0,0 +1,137 @@
import torch
import torch.optim as optim
import torch.optim.lr_scheduler as lr_sched
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable
import numpy as np
import tensorboard_logger as tb_log
import os
from models import Pointnet2MSG as Pointnet
from models.Pointnet2SemSeg import model_fn_decorator
from data import Indoor3DSemSeg
import utils.pytorch_utils as pt_utils
import argparse
parser = argparse.ArgumentParser(description="Arg parser")
parser.add_argument(
"-batch_size", type=int, default=32, help="Batch size [default: 32]")
parser.add_argument(
"-num_points",
type=int,
default=2048,
help="Number of points to train with [default: 2048]")
parser.add_argument(
"-weight_decay",
type=float,
default=0,
help="L2 regularization coeff [default: 0.0]")
parser.add_argument(
"-lr",
type=float,
default=1e-2,
help="Initial learning rate [default: 1e-2]")
parser.add_argument(
"-lr_decay",
type=float,
default=0.5,
help="Learning rate decay gamma [default: 0.5]")
parser.add_argument(
"-decay_step",
type=int,
default=20,
help="Learning rate decay step [default: 20]")
parser.add_argument(
"-bn_momentum",
type=float,
default=0.9,
help="Initial batch norm momentum [default: 0.9]")
parser.add_argument(
"-bn_decay",
type=float,
default=0.5,
help="Batch norm momentum decay gamma [default: 0.5]")
parser.add_argument(
"-checkpoint", type=str, default=None, help="Checkpoint to start from")
parser.add_argument(
"-epochs", type=int, default=200, help="Number of epochs to train for")
parser.add_argument(
"-run_name",
type=str,
default="sem_seg_run_1",
help="Name for run in tensorboard_logger")
BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
lr_clip = 1e-5
bnm_clip = 1e-2
if __name__ == "__main__":
args = parser.parse_args()
tb_log.configure('runs/{}'.format(args.run_name))
test_set = Indoor3DSemSeg(
args.num_points, BASE_DIR, train=False, data_precent=0.01)
test_loader = DataLoader(
test_set,
batch_size=args.batch_size,
shuffle=True,
pin_memory=True,
num_workers=2)
train_set = Indoor3DSemSeg(args.num_points, BASE_DIR, data_precent=1.0)
train_loader = DataLoader(
train_set,
batch_size=args.batch_size,
pin_memory=True,
num_workers=2,
shuffle=True)
model = Pointnet(num_classes=13)
model.cuda()
optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), lr_clip / args.lr)
bnm_lmbd = lambda e: max(args.bn_momentum * args.bn_decay**(e // args.decay_step), bnm_clip)
if args.checkpoint is None:
lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
bnm_scheduler = pt_utils.BNMomentumScheduler(model, bnm_lmbd)
start_epoch = 1
best_prec = 0
best_loss = 1e10
else:
start_epoch, best_loss = pt_utils.load_checkpoint(
model, optimizer, filename=args.checkpoint.split(".")[0])
lr_scheduler = lr_sched.LambdaLR(
optimizer, lr_lbmd, last_epoch=start_epoch)
bnm_scheduler = pt_utils.BNMomentumScheduler(
model, bnm_lmbd, last_epoch=start_epoch)
model_fn = model_fn_decorator(nn.CrossEntropyLoss())
trainer = pt_utils.Trainer(
model,
model_fn,
optimizer,
checkpoint_name="sem_seg_checkpoint",
best_name="sem_seg_best",
lr_scheduler=lr_scheduler,
bnm_scheduler=bnm_scheduler,
eval_frequency=10)
trainer.train(
start_epoch,
args.epochs,
train_loader,
test_loader,
best_loss=best_loss)
if start_epoch == args.epochs:
test_loader.dataset.data_precent = 1.0
_ = trainer.eval_epoch(start_epoch, test_loader)
+2
View File
@@ -0,0 +1,2 @@
build
_ext
+20
View File
@@ -0,0 +1,20 @@
project(PointNet2)
cmake_minimum_required(VERSION 3.5)
find_package(CUDA)
include_directories("${CMAKE_SOURCE_DIR}/cinclude")
cuda_include_directories("${CMAKE_SOURCE_DIR}/cinclude")
file(GLOB cuda_kernels_src "csrc/*.cu")
cuda_compile(cuda_kernels SHARED ${cuda_kernels_src} OPTIONS -O3)
file(GLOB wrapper_headers "cinclude/*wrapper.h")
add_custom_command(OUTPUT "${CMAKE_SOURCE_DIR}/_ext/__ext.so"
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
COMMAND python "${CMAKE_SOURCE_DIR}/build_ffi.py" ${cuda_kernels}
DEPENDS ${cuda_kernels}
DEPENDS ${wrapper_headers}
VERBATIM)
add_custom_target(ext ALL
DEPENDS "${CMAKE_SOURCE_DIR}/_ext/__ext.so")
View File
+23
View File
@@ -0,0 +1,23 @@
import glob
import torch
from os import path
from torch.utils.ffi import create_extension
import sys
base_dir = path.dirname(path.abspath(__file__))
extra_objects = sys.argv[1:]
extra_objects += [a for a in glob.glob('/usr/local/cuda/lib64/*.a')]
ffi = create_extension(
'_ext',
headers=[a for a in glob.glob("cinclude/*_wrapper.h")],
sources=[a for a in glob.glob("csrc/*.c")],
define_macros=[('WITH_CUDA', None)],
relative_to=__file__,
with_cuda=True,
extra_objects=extra_objects,
include_dirs=[path.join(base_dir, 'cinclude')])
if __name__ == "__main__":
assert torch.cuda.is_available(), "Needs CUDA!"
ffi.build()
+16
View File
@@ -0,0 +1,16 @@
#ifndef _BALL_QUERY_GPU
#define _BALL_QUERY_GPU
#ifdef __cplusplus
extern "C" {
#endif
void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
int nsample, const float *xyz,
const float *new_xyz, int *idx,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif
+4
View File
@@ -0,0 +1,4 @@
int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor,
THCudaIntTensor *idx_tensor);
+24
View File
@@ -0,0 +1,24 @@
#ifndef _CUDA_UTILS_H
#define _CUDA_UTILS_H
#ifdef __cplusplus
extern "C" {
#endif
inline int opt_n_threads(int work_size) {
unsigned int n_threads = work_size;
n_threads--;
n_threads |= n_threads >> 1;
n_threads |= n_threads >> 2;
n_threads |= n_threads >> 4;
n_threads |= n_threads >> 8;
n_threads |= n_threads >> 16;
n_threads++;
return max(min(n_threads / 2, 512), 2);
}
#ifdef __cplusplus
}
#endif
#endif
+19
View File
@@ -0,0 +1,19 @@
#ifndef _BALL_QUERY_GPU
#define _BALL_QUERY_GPU
#ifdef __cplusplus
extern "C" {
#endif
void group_points_kernel_wrapper(int b, int n, int c, int npoints, int nsample,
const float *points, const int *idx,
float *out, cudaStream_t stream);
void group_points_grad_kernel_wrapper(int b, int n, int c, int npoints,
int nsample, const float *grad_out,
const int *idx, float *grad_points,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif
+8
View File
@@ -0,0 +1,8 @@
int group_points_wrapper(int b, int n, int c, int npoints, int nsample,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor, THCudaTensor *out);
int group_points_grad_wrapper(int b, int n, int c, int npoints, int nsample,
THCudaTensor *grad_out_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *grad_points_tensor);
+27
View File
@@ -0,0 +1,27 @@
#ifndef _INTERPOLATE_GPU_H
#define _INTERPOLATE_GPU_H
#ifdef __cplusplus
extern "C" {
#endif
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx,
cudaStream_t stream);
void three_interpolate_kernel_wrapper(int b, int m, int c, int n,
const float *points, const int *idx,
const float *weight, float *out,
cudaStream_t stream);
void three_interpolate_grad_kernel_wrapper(int b, int n, int c, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif
+16
View File
@@ -0,0 +1,16 @@
void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor,
THCudaTensor *known_tensor, THCudaTensor *dist2_tensor,
THCudaIntTensor *idx_tensor);
void three_interpolate_wrapper(int b, int m, int c, int n,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *weight_tensor,
THCudaTensor *out_tensor);
void three_interpolate_grad_wrapper(int b, int n, int c, int m,
THCudaTensor *grad_out_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *weight_tensor,
THCudaTensor *grad_points_tensor);
+29
View File
@@ -0,0 +1,29 @@
#ifndef _ROI_MASK_POINTS_GPU_H
#define _ROI_MASK_POINTS_GPU_H
#ifdef __cplusplus
extern "C" {
#endif
void roi_mask_kernel_wrapper(int n_roi, int b, int n, const float *rois,
const long *batch_indices, const float *data_xyz,
unsigned char *mask, cudaStream_t stream);
void roi_avg_pool_kernel_forward_wrapper(int n_roi, int b, int n, int d,
const unsigned char *mask,
const long *batch_indices,
const float *points,
float *descriptors,
cudaStream_t stream);
void roi_avg_pool_kernel_backward_wrapper(int n_roi, int b, int n, int d,
const unsigned char *mask,
const long *batch_indices,
const float *grad_descriptors,
float *grad_points,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif
+15
View File
@@ -0,0 +1,15 @@
int roi_mask_wrapper(int n_roi, int b, int n, THCudaTensor *rois_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *data_xyz_tensor,
THCudaByteTensor *mask_tensor);
int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d,
THCudaByteTensor *mask_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *points_tensor,
THCudaTensor *descriptors_tensor);
int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d,
THCudaByteTensor *mask_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *grad_descriptors_tensor,
THCudaTensor *grad_points_tensor);
+19
View File
@@ -0,0 +1,19 @@
#ifndef _SAMPLING_GPU_H
#define _SAMPLING_GPU_H
#ifdef __cplusplus
extern "C" {
#endif
void gather_points_kernel_wrapper(int b, int n, int c, int npoints,
const float *points, const int *idx,
float *out, cudaStream_t stream);
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
const float *dataset, float *temp,
int *idxs, cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif
+10
View File
@@ -0,0 +1,10 @@
int gather_points_wrapper(int b, int n, int c, int npoints,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *out_tensor);
int furthest_point_sampling_wrapper(int b, int n, int m,
THCudaTensor *points_tensor,
THCudaTensor *temp_tensor,
THCudaIntTensor *idx_tensor);
+20
View File
@@ -0,0 +1,20 @@
#include <THC/THC.h>
#include "ball_query_gpu.h"
extern THCState *state;
int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor,
THCudaIntTensor *idx_tensor) {
const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor);
const float *xyz = THCudaTensor_data(state, xyz_tensor);
int *idx = THCudaIntTensor_data(state, idx_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
query_ball_point_kernel_wrapper(b, n, m, radius, nsample, new_xyz, xyz,
idx, stream);
return 1;
}
+63
View File
@@ -0,0 +1,63 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "ball_query_gpu.h"
#include "cuda_utils.h"
// input: new_xyz(b, m, 3) xyz(b, n, 3)
// output: idx(b, m, nsample)
__global__ void query_ball_point_kernel(int b, int n, int m, float radius,
int nsample,
const float *__restrict__ new_xyz,
const float *__restrict__ xyz,
int * __restrict__ idx) {
int batch_index = blockIdx.x;
xyz += batch_index * n * 3;
new_xyz += batch_index * m * 3;
idx += m * nsample * batch_index;
int index = threadIdx.x;
int stride = blockDim.x;
float radius2 = radius * radius;
for (int j = index; j < m; j += stride) {
float new_x = new_xyz[j * 3 + 0];
float new_y = new_xyz[j * 3 + 1];
float new_z = new_xyz[j * 3 + 2];
for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
float x = xyz[k * 3 + 0];
float y = xyz[k * 3 + 1];
float z = xyz[k * 3 + 2];
float d2 = (new_x - x) * (new_x - x) +
(new_y - y) * (new_y - y) +
(new_z - z) * (new_z - z);
if (d2 < radius2) {
if (cnt == 0) {
for (int l = 0; l < nsample; ++l) {
idx[j * nsample + l] = k;
}
}
idx[j * nsample + cnt] = k;
++cnt;
}
}
}
}
void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
int nsample, const float *new_xyz,
const float *xyz, int *idx,
cudaStream_t stream) {
cudaError_t err;
query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
b, n, m, radius, nsample, new_xyz, xyz, idx);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
+37
View File
@@ -0,0 +1,37 @@
#include <THC/THC.h>
#include "group_points_gpu.h"
extern THCState *state;
int group_points_wrapper(int b, int n, int c, int npoints, int nsample,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *out_tensor) {
const float *points = THCudaTensor_data(state, points_tensor);
const int *idx = THCudaIntTensor_data(state, idx_tensor);
float *out = THCudaTensor_data(state, out_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
group_points_kernel_wrapper(b, n, c, npoints, nsample, points, idx, out,
stream);
return 1;
}
int group_points_grad_wrapper(int b, int n, int c, int npoints, int nsample,
THCudaTensor *grad_out_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *grad_points_tensor) {
float *grad_points = THCudaTensor_data(state, grad_points_tensor);
const int *idx = THCudaIntTensor_data(state, idx_tensor);
const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
group_points_grad_kernel_wrapper(b, n, c, npoints, nsample, grad_out,
idx, grad_points, stream);
return 1;
}
+86
View File
@@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include "group_points_gpu.h"
#include "cuda_utils.h"
// input: points(b, n, c) idx(b, npoints, nsample)
// output: out(b, npoints, nsample, c)
__global__ void group_points_kernel(int b, int n, int c, int npoints,
int nsample,
const float *__restrict__ points,
const int *__restrict__ idx,
float *__restrict__ out) {
int batch_index = blockIdx.x;
points += batch_index * n * c;
idx += batch_index * npoints * nsample;
out += batch_index * npoints * nsample * c;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < npoints; j += stride) {
for (int k = 0; k < nsample; ++k) {
int ii = idx[j * nsample + k];
memcpy(out + j * nsample * c + k * c, points + ii * c,
sizeof(float) * c);
}
}
}
void group_points_kernel_wrapper(int b, int n, int c, int npoints, int nsample,
const float *points, const int *idx,
float *out, cudaStream_t stream) {
cudaError_t err;
group_points_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
b, n, c, npoints, nsample, points, idx, out);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
// input: grad_out(b, npoints, nsample, c), idx(b, npoints, nsample)
// output: grad_points(b, n, c)
__global__ void group_points_grad_kernel(int b, int n, int c, int npoints,
int nsample,
const float *__restrict__ grad_out,
const int *__restrict__ idx,
float *__restrict__ grad_points) {
int batch_index = blockIdx.x;
grad_points += batch_index * n * c;
idx += batch_index * npoints * nsample;
grad_out += batch_index * npoints * nsample * c;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < npoints; j += stride) {
for (int k = 0; k < nsample; ++k) {
int ii = idx[j * nsample + k];
for (int l = 0; l < c; ++l) {
atomicAdd(
grad_points + ii * c + l,
grad_out[j * nsample * c + k * c + l]);
}
}
}
}
void group_points_grad_kernel_wrapper(int b, int n, int c, int npoints,
int nsample, const float *grad_out,
const int *idx, float *grad_points,
cudaStream_t stream) {
cudaError_t err;
group_points_grad_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
b, n, c, npoints, nsample, grad_out, idx, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
+52
View File
@@ -0,0 +1,52 @@
#include <THC/THC.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "interpolate_gpu.h"
extern THCState *state;
void three_nn_wrapper(int b, int n, int m, THCudaTensor *unknown_tensor,
THCudaTensor *known_tensor, THCudaTensor *dist2_tensor,
THCudaIntTensor *idx_tensor) {
const float *unknown = THCudaTensor_data(state, unknown_tensor);
const float *known = THCudaTensor_data(state, known_tensor);
float *dist2 = THCudaTensor_data(state, dist2_tensor);
int *idx = THCudaIntTensor_data(state, idx_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
three_nn_kernel_wrapper(b, n, m, unknown, known, dist2, idx, stream);
}
void three_interpolate_wrapper(int b, int m, int c, int n,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *weight_tensor,
THCudaTensor *out_tensor) {
const float *points = THCudaTensor_data(state, points_tensor);
const float *weight = THCudaTensor_data(state, weight_tensor);
float *out = THCudaTensor_data(state, out_tensor);
const int *idx = THCudaIntTensor_data(state, idx_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
three_interpolate_kernel_wrapper(b, m, c, n, points, idx, weight, out,
stream);
}
void three_interpolate_grad_wrapper(int b, int n, int c, int m,
THCudaTensor *grad_out_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *weight_tensor,
THCudaTensor *grad_points_tensor) {
const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
const float *weight = THCudaTensor_data(state, weight_tensor);
float *grad_points = THCudaTensor_data(state, grad_points_tensor);
const int *idx = THCudaIntTensor_data(state, idx_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
three_interpolate_grad_kernel_wrapper(b, n, c, m, grad_out, idx, weight,
grad_points, stream);
}
+180
View File
@@ -0,0 +1,180 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "interpolate_gpu.h"
#include "cuda_utils.h"
// input: unknown(b, n, 3) known(b, m, 3)
// output: dist2(b, n, 3), idx(b, n, 3)
__global__ void three_nn_kernel(int b, int n, int m,
const float *__restrict__ unknown,
const float *__restrict__ known,
float *__restrict__ dist2,
int *__restrict__ idx) {
int batch_index = blockIdx.x;
unknown += batch_index * n * 3;
known += batch_index * m * 3;
dist2 += batch_index * n * 3;
idx += batch_index * n * 3;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < n; j += stride) {
float ux = unknown[j * 3 + 0];
float uy = unknown[j * 3 + 1];
float uz = unknown[j * 3 + 2];
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
int besti1 = 0, besti2 = 0, besti3 = 0;
for (int k = 0; k < m; ++k) {
float x = known[k * 3 + 0];
float y = known[k * 3 + 1];
float z = known[k * 3 + 2];
float d =
(ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
if (d < best1) {
best3 = best2;
besti3 = besti2;
best2 = best1;
besti2 = besti1;
best1 = d;
besti1 = k;
} else if (d < best2) {
best3 = best2;
besti3 = besti2;
best2 = d;
besti2 = k;
} else if (d < best3) {
best3 = d;
besti3 = k;
}
}
dist2[j * 3 + 0] = best1;
dist2[j * 3 + 1] = best2;
dist2[j * 3 + 2] = best3;
idx[j * 3 + 0] = besti1;
idx[j * 3 + 1] = besti2;
idx[j * 3 + 2] = besti3;
}
}
void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx,
cudaStream_t stream) {
cudaError_t err;
three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
dist2, idx);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel "
"failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
// input: points(b, m, c), idx(b, n, 3), weight(b, n, 3)
// output: out(b, n, c)
__global__ void three_interpolate_kernel(int b, int m, int c, int n,
const float *__restrict__ points,
const int *__restrict__ idx,
const float *__restrict__ weight,
float *__restrict__ out) {
int batch_index = blockIdx.x;
points += batch_index * m * c;
idx += batch_index * n * 3;
weight += batch_index * n * 3;
out += batch_index * n * c;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < n; j += stride) {
float w1 = weight[j * 3 + 0];
float w2 = weight[j * 3 + 1];
float w3 = weight[j * 3 + 2];
int i1 = idx[j * 3 + 0];
int i2 = idx[j * 3 + 1];
int i3 = idx[j * 3 + 2];
for (int l = 0; l < c; ++l) {
out[j * c + l] = points[i1 * c + l] * w1 + points[i2 * c + l] * w2 +
points[i3 * c + l] * w3;
}
}
}
void three_interpolate_kernel_wrapper(int b, int m, int c, int n,
const float *points, const int *idx,
const float *weight, float *out,
cudaStream_t stream) {
cudaError_t err;
three_interpolate_kernel<<<b, opt_n_threads(n) / 4, 0, stream>>>(
b, m, c, n, points, idx, weight, out);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel "
"failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
// input: grad_out(b, n, c), idx(b, n, 3), weight(b, n, 3)
// output: grad_points(b, m, c)
__global__ void three_interpolate_grad_kernel(
int b, int n, int c, int m, const float *__restrict__ grad_out,
const int *__restrict__ idx, const float *__restrict__ weight,
float *__restrict__ grad_points) {
int batch_index = blockIdx.x;
grad_out += batch_index * n * c;
idx += batch_index * n * 3;
weight += batch_index * n * 3;
grad_points += batch_index * m * c;
int index = threadIdx.x;
int stride = blockDim.x;
for (int j = index; j < n; j += stride) {
float w1 = weight[j * 3 + 0];
float w2 = weight[j * 3 + 1];
float w3 = weight[j * 3 + 2];
int i1 = idx[j * 3 + 0];
int i2 = idx[j * 3 + 1];
int i3 = idx[j * 3 + 2];
for (int l = 0; l < c; ++l) {
atomicAdd(grad_points + i1 * c + l, grad_out[j * c + l] * w1);
atomicAdd(grad_points + i2 * c + l, grad_out[j * c + l] * w2);
atomicAdd(grad_points + i3 * c + l, grad_out[j * c + l] * w3);
}
}
}
void three_interpolate_grad_kernel_wrapper(int b, int n, int c, int m,
const float *grad_out,
const int *idx, const float *weight,
float *grad_points,
cudaStream_t stream) {
cudaError_t err;
three_interpolate_grad_kernel<<<b, opt_n_threads(n) / 4, 0, stream>>>(
b, n, c, m, grad_out, idx, weight, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel "
"failed : %s\n",
cudaGetErrorString(err));
exit(-1);
}
}
+157
View File
@@ -0,0 +1,157 @@
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "roi_mask_points_gpu.h"
// roi format: [w, d, h, theta, cx, cy, cz]
__device__ bool is_in_roi(const float *__restrict__ xyz,
const float *__restrict__ roi) {
const float w = roi[0], d = roi[1], h = roi[2], theta = roi[3], cx = roi[4],
cy = roi[5], cz = roi[6];
const float x = xyz[0], y = xyz[1], z = xyz[2];
const float sinval = sin(theta);
const float cosval = cos(theta);
const float bx_x = w * cosval;
const float bx_y = d * -sinval;
const float by_x = w * sinval;
const float by_y = d * cosval;
const float dx = fabs(x - cx), dy = fabs(y - cy), dz = fabs(z - cz);
return dx <= fabs(bx_x + by_x) && dy <= fabs(bx_y + by_y) && dz <= h;
}
// Input rois (n_roi, 7), batch_indices (n_roi), data_xyz (b, n, 3)
// Ouput mask (n_roi, n)
__global__ void roi_mask_kernel(int n_roi, int b, int n,
const float *__restrict__ rois,
const long *__restrict__ batch_indices,
const float *__restrict__ data_xyz,
unsigned char *__restrict__ mask) {
const int block_idx = blockIdx.x;
const float *__restrict__ roi = rois + block_idx * 7;
mask += block_idx * n;
const long batch_idx = batch_indices[block_idx];
data_xyz += batch_idx * n * 3;
const int thread_idx = threadIdx.x;
const int thread_stride = blockDim.x;
for (int j = thread_idx; j < n; j += thread_stride) {
const float *__restrict__ xyz = data_xyz + j * 3;
mask[j] = is_in_roi(xyz, roi) ? 1 : 0;
}
}
void roi_mask_kernel_wrapper(int n_roi, int b, int n, const float *rois,
const long *batch_indices, const float *data_xyz,
unsigned char *mask, cudaStream_t stream) {
cudaError_t err;
unsigned int n_threads = opt_n_threads(n);
roi_mask_kernel<<<n_roi, n_threads, 0, stream>>>(
n_roi, b, n, rois, batch_indices, data_xyz, mask);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
// Input mask(n_roi, n) batch_indices (n_roi), points (b, n, d)
// Ouput count (n_roi,) descriptors (n_roi, d)
__global__ void roi_avg_pool_kernel_forward(
int n_roi, int b, int n, int d, const unsigned char *__restrict__ mask,
const long *__restrict__ batch_indices, const float *__restrict__ points,
float *__restrict__ descriptors) {
const int block_idx = blockIdx.x;
mask += block_idx * n;
descriptors += block_idx * d;
const long batch_idx = batch_indices[block_idx];
points += batch_idx * n * d;
const int thread_idx = threadIdx.x;
const int thread_stride = blockDim.x;
for (int j = thread_idx; j < n; j += thread_stride) {
if (mask[j] == 1) {
for (int c = 0; c < d; ++c) {
atomicAdd(descriptors + c, points[j * d + c]);
}
}
}
}
void roi_avg_pool_kernel_forward_wrapper(int n_roi, int b, int n, int d,
const unsigned char *mask,
const long *batch_indices,
const float *points,
float *descriptors,
cudaStream_t stream) {
cudaError_t err;
unsigned int n_threads = opt_n_threads(n);
roi_avg_pool_kernel_forward<<<n_roi, n_threads, 0, stream>>>(
n_roi, b, n, d, mask, batch_indices, points, descriptors);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__global__ void
roi_avg_pool_kernel_backward(int n_roi, int b, int n, int d,
const unsigned char *__restrict__ mask,
const long *__restrict__ batch_indices,
const float *__restrict__ grad_descriptors,
float *__restrict__ grad_points) {
const int block_idx = blockIdx.x;
mask += block_idx * n;
grad_descriptors += block_idx * d;
const long batch_idx = batch_indices[block_idx];
grad_points += batch_idx * n * d;
const int thread_idx = threadIdx.x;
const int thread_stride = blockDim.x;
for (int j = thread_idx; j < n; j += thread_stride) {
if (mask[j] == 1) {
for (int c = 0; c < d; ++c) {
atomicAdd(grad_points + j * d + c, grad_descriptors[c]);
}
}
}
}
void roi_avg_pool_kernel_backward_wrapper(int n_roi, int b, int n, int d,
const unsigned char *mask,
const long *batch_indices,
const float *grad_descriptors,
float *grad_points,
cudaStream_t stream) {
cudaError_t err;
unsigned int n_threads = opt_n_threads(n);
roi_avg_pool_kernel_backward<<<n_roi, n_threads, 0, stream>>>(
n_roi, b, n, d, mask, batch_indices, grad_descriptors, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
+63
View File
@@ -0,0 +1,63 @@
#include <THC/THC.h>
#include "roi_mask_points_gpu.h"
extern THCState *state;
int roi_mask_wrapper(int n_roi, int b, int n, THCudaTensor *rois_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *data_xyz_tensor,
THCudaByteTensor *mask_tensor) {
const float *rois = THCudaTensor_data(state, rois_tensor);
const long *batch_indices =
THCudaLongTensor_data(state, batch_indices_tensor);
const float *data_xyz = THCudaTensor_data(state, data_xyz_tensor);
unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
roi_mask_kernel_wrapper(n_roi, b, n, rois, batch_indices, data_xyz,
mask, stream);
return 1;
}
int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d,
THCudaByteTensor *mask_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *points_tensor,
THCudaTensor *descriptors_tensor) {
const long *batch_indices =
THCudaLongTensor_data(state, batch_indices_tensor);
const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
const float *points = THCudaTensor_data(state, points_tensor);
float *descriptors = THCudaTensor_data(state, descriptors_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
roi_avg_pool_kernel_forward_wrapper(n_roi, b, n, d, mask, batch_indices,
points, descriptors, stream);
return 1;
}
int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d,
THCudaByteTensor *mask_tensor,
THCudaLongTensor *batch_indices_tensor,
THCudaTensor *grad_descriptors_tensor,
THCudaTensor *grad_points_tensor) {
const long *batch_indices =
THCudaLongTensor_data(state, batch_indices_tensor);
const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
const float *grad_descriptors =
THCudaTensor_data(state, grad_descriptors_tensor);
float *grad_points = THCudaTensor_data(state, grad_points_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
roi_avg_pool_kernel_backward_wrapper(n_roi, b, n, d, mask,
batch_indices, grad_descriptors,
grad_points, stream);
return 1;
}
+37
View File
@@ -0,0 +1,37 @@
#include <THC/THC.h>
#include "sampling_gpu.h"
extern THCState *state;
int gather_points_wrapper(int b, int n, int c, int npoints,
THCudaTensor *points_tensor,
THCudaIntTensor *idx_tensor,
THCudaTensor *out_tensor) {
const float *points = THCudaTensor_data(state, points_tensor);
const int *idx = THCudaIntTensor_data(state, idx_tensor);
float *out = THCudaTensor_data(state, out_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
gather_points_kernel_wrapper(b, n, c, npoints, points, idx, out,
stream);
return 1;
}
int furthest_point_sampling_wrapper(int b, int n, int m,
THCudaTensor *points_tensor,
THCudaTensor *temp_tensor,
THCudaIntTensor *idx_tensor) {
const float *points = THCudaTensor_data(state, points_tensor);
float *temp = THCudaTensor_data(state, temp_tensor);
int *idx = THCudaIntTensor_data(state, idx_tensor);
cudaStream_t stream = THCState_getCurrentStream(state);
furthest_point_sampling_kernel_wrapper(b, n, m, points, temp, idx,
stream);
return 1;
}
+216
View File
@@ -0,0 +1,216 @@
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "sampling_gpu.h"
// input: points(b, n, c) idx(b, m)
// output: out(b, m, c)
__global__ void gather_points_kernel(int b, int n, int c, int m,
const float *__restrict__ points,
const int *__restrict__ idx,
float *__restrict__ out) {
for (int i = blockIdx.x; i < b; i += gridDim.x) {
for (int j = blockIdx.y * blockDim.x + threadIdx.x; j < m;
j += blockDim.x * gridDim.y) {
int a = idx[i * m + j];
memcpy(out + (i * m + j) * c, points + (i * n + a) * c,
sizeof(float) * c);
}
}
}
void gather_points_kernel_wrapper(int b, int n, int c, int npoints,
const float *points, const int *idx,
float *out, cudaStream_t stream) {
cudaError_t err;
gather_points_kernel<<<dim3(2, 8, 1), opt_n_threads(npoints) / 4, 0,
stream>>>(b, n, c, npoints, points, idx, out);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
int idx1, int idx2) {
const float v1 = dists[idx1], v2 = dists[idx2];
const int i1 = dists_i[idx1], i2 = dists_i[idx2];
dists[idx1] = max(v1, v2);
dists_i[idx1] = v2 > v1 ? i2 : i1;
}
// Input dataset: (b, n, 3), tmp: (b, n)
// Ouput idxs (b, m)
template <unsigned int block_size>
__global__ void furthest_point_sampling_kernel(
int b, int n, int m, const float *__restrict__ dataset,
float *__restrict__ temp, int *__restrict__ idxs) {
if (m <= 0)
return;
__shared__ float dists[block_size];
__shared__ int dists_i[block_size];
int batch_index = blockIdx.x;
dataset += batch_index * n * 3;
temp += batch_index * n;
idxs += batch_index * m;
int tid = threadIdx.x;
const int stride = block_size;
int old = 0;
if (threadIdx.x == 0)
idxs[0] = old;
__syncthreads();
for (int j = 1; j < m; j++) {
int besti = 0;
float best = -1;
float x1 = dataset[old * 3 + 0];
float y1 = dataset[old * 3 + 1];
float z1 = dataset[old * 3 + 2];
for (int k = tid; k < n; k += stride) {
float x2, y2, z2;
x2 = dataset[k * 3 + 0];
y2 = dataset[k * 3 + 1];
z2 = dataset[k * 3 + 2];
float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
if (mag <= 1e-3)
continue;
float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) +
(z2 - z1) * (z2 - z1);
float d2 = min(d, temp[k]);
temp[k] = d2;
besti = d2 > best ? k : besti;
best = d2 > best ? d2 : best;
}
dists[tid] = best;
dists_i[tid] = besti;
__syncthreads();
if (block_size >= 512) {
if (tid < 256) {
__update(dists, dists_i, tid, tid + 256);
}
__syncthreads();
}
if (block_size >= 256) {
if (tid < 128) {
__update(dists, dists_i, tid, tid + 128);
}
__syncthreads();
}
if (block_size >= 128) {
if (tid < 64) {
__update(dists, dists_i, tid, tid + 64);
}
__syncthreads();
}
if (block_size >= 64) {
if (tid < 32) {
__update(dists, dists_i, tid, tid + 32);
}
__syncthreads();
}
if (block_size >= 32) {
if (tid < 16) {
__update(dists, dists_i, tid, tid + 16);
}
__syncthreads();
}
if (block_size >= 16) {
if (tid < 8) {
__update(dists, dists_i, tid, tid + 8);
}
__syncthreads();
}
if (block_size >= 8) {
if (tid < 4) {
__update(dists, dists_i, tid, tid + 4);
}
__syncthreads();
}
if (block_size >= 4) {
if (tid < 2) {
__update(dists, dists_i, tid, tid + 2);
}
__syncthreads();
}
if (block_size >= 2) {
if (tid < 1) {
__update(dists, dists_i, tid, tid + 1);
}
__syncthreads();
}
old = dists_i[0];
if (tid == 0)
idxs[j] = old;
}
}
void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
const float *dataset, float *temp,
int *idxs, cudaStream_t stream) {
cudaError_t err;
unsigned int n_threads = opt_n_threads(n);
switch (n_threads) {
case 512:
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 256:
furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 128:
furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 64:
furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 32:
furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 16:
furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 8:
furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 4:
furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 2:
furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
case 1:
furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
break;
default:
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(
b, n, m, dataset, temp, idxs);
}
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
+70
View File
@@ -0,0 +1,70 @@
import torch
import numpy as np
class PointcloudScale(object):
def __init__(self, mean=2.0, std=1.0, clip=1.8):
self.mean, self.std, self.clip = mean, std, clip
def __call__(self, points):
scaler = points.new(1).normal_(
mean=self.mean, std=self.std).clamp_(
max(self.mean - self.clip, 0.01), self.mean + self.clip)
return scaler * points
class PointcloudRotate(object):
def __init__(self, x_axis=False, z_axis=True):
assert x_axis or z_axis
self.x, self.y = x_axis, z_axis
def _get_angles(self):
rotation_angle = np.random.uniform() * 2 * np.pi
cosval = np.cos(rotation_angle)
sinval = np.sin(rotation_angle)
return cosval, sinval
def __call__(self, points):
if self.z:
sinval, cosval = self._get_angles()
Rz = points.new([[cosval, sinval, 0], [-sinval, cosval, 0],
[0, 0, 1]])
else:
Rz = torch.eye(3)
if self.x:
sinval, cosval = self._get_angles()
Rx = points.new([[1, 0, 0], [0, cosval, sinval],
[0, -sinval, cosval]])
else:
Rx = torch.eye(3)
rot_mat = Rx @ Rz
return points @ rot_mat
class PointcloudJitter(object):
def __init__(self, std=0.01, clip=0.03):
self.std, self.clip = std, clip
def __call__(self, points):
jittered_data = points.new(*points.size()).normal_(
mean=0.0, std=self.std).clamp_(-self.clip, self.clip)
return points + jittered_data
class PointcloudTranslate(object):
def __init__(self, std=1.0, clip=3.0):
self.std, self.clip = std, clip
def __call__(self, points):
translation = points.new(3).normal_(
mean=0.0, std=self.std).clamp_(-self.clip, self.clip)
return points + translation
class PointcloudToTensor(object):
def __call__(self, points):
return torch.from_numpy(points).float()
+76
View File
@@ -0,0 +1,76 @@
import torch
from enum import Enum
PDist2Order = Enum('PDist2Order', 'd_first d_second')
def pdist2(X: torch.Tensor,
Z: torch.Tensor = None,
order: PDist2Order = PDist2Order.d_second) -> torch.Tensor:
r""" Calculates the pairwise distance between X and Z
D[b, i, j] = l2 distance X[b, i] and Z[b, j]
Parameters
---------
X : torch.Tensor
X is a (B, N, d) tensor. There are B batches, and N vectors of dimension d
Z: torch.Tensor
Z is a (B, M, d) tensor. If Z is None, then Z = X
Returns
-------
torch.Tensor
Distance matrix is size (B, N, M)
"""
if order == PDist2Order.d_second:
if X.dim() == 2:
X = X.unsqueeze(0)
if Z is None:
Z = X
G = X @ Z.transpose(-2, -1)
S = (X * X).sum(-1, keepdim=True)
R = S.transpose(-2, -1)
else:
if Z.dim() == 2:
Z = Z.unsqueeze(0)
G = X @ Z.transpose(-2, -1)
S = (X * X).sum(-1, keepdim=True)
R = (Z * Z).sum(-1, keepdim=True).transpose(-2, -1)
else:
if X.dim() == 2:
X = X.unsqueeze(0)
if Z is None:
Z = X
G = X.transpose(-2, -1) @ Z
R = (X * X).sum(-2, keepdim=True)
S = R.transpose(-2, -1)
else:
if Z.dim() == 2:
Z = Z.unsqueeze(0)
G = X.transpose(-2, -1) @ Z
S = (X * X).sum(-2, keepdim=True).transpose(-2, -1)
R = (Z * Z).sum(-2, keepdim=True)
return torch.abs(R + S - 2 * G).squeeze(0)
def pdist2_slow(X, Z=None):
if Z is None: Z = X
D = torch.zeros(X.size(0), X.size(2), Z.size(2))
for b in range(D.size(0)):
for i in range(D.size(1)):
for j in range(D.size(2)):
D[b, i, j] = torch.dist(X[b, :, i], Z[b, :, j])
return D
if __name__ == "__main__":
X = torch.randn(2, 3, 5)
Z = torch.randn(2, 3, 3)
print(pdist2(X, order=PDist2Order.d_first))
print(pdist2_slow(X))
print(torch.dist(pdist2(X, order=PDist2Order.d_first), pdist2_slow(X)))
+243
View File
@@ -0,0 +1,243 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import pointnet2_utils
import pytorch_utils as pt_utils
from typing import List
class PointnetSAModuleMSG(nn.Module):
r"""Pointnet set abstrction layer with multiscale grouping
Parameters
----------
npoint : int
Number of points
radii : list of float32
list of radii to group with
nsamples : list of int32
Number of samples in each ball query
mlps : list of list of int32
Spec of the pointnet before the global max_pool for each scale
bn : bool
Use batchnorm
"""
def __init__(self,
*,
npoint: int,
radii: List[float],
nsamples: List[int],
mlps: List[List[int]],
bn: bool = True):
super().__init__()
assert len(radii) == len(nsamples) == len(mlps)
self.npoint = npoint
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample))
mlp_spec = mlps[i]
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
def forward(self, xyz: torch.Tensor,
points: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
r"""
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor of the xyz coordinates of the points
point : torch.Tensor
(B, N, C) tensor of the descriptors of the the points
Returns
-------
new_xyz : torch.Tensor
(B, npoint, 3) tensor of the new points' xyz
new_points : torch.Tensor
(B, npoint, \sum_k(mlps[k][-1])) tensor of the new_points descriptors
"""
new_points_list = []
new_xyz = pointnet2_utils.gather_points(
xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint))
for i in range(len(self.groupers)):
new_points = self.groupers[i](xyz, new_xyz, points)
new_points = self.mlps[i](new_points.permute(
0, 3, 1, 2)) # (B, mlp[-1], npoint, nsample)
new_points = F.max_pool2d(
new_points,
kernel_size=[1, new_points.size(3)]) # (B, mlp[-1], npoint, 1)
new_points = new_points.squeeze(-1) # (B, mlp[-1], npoint)
new_points = new_points.transpose(
1, 2).contiguous() # (B, npoint, mlp[-1])
new_points_list.append(new_points)
return new_xyz, torch.cat(new_points_list, dim=-1)
class PointnetSAModule(nn.Module):
r"""Pointnet set abstrction layer
Parameters
----------
npoint : int
Number of points
radius : float
Radius of ball
nsample : int
Number of samples in the ball query
mlp : list
Spec of the pointnet before the global max_pool
bn : bool
Use batchnorm
"""
def __init__(self,
*,
mlp: List[int],
npoint: int = None,
radius: float = None,
nsample: int = None,
bn: bool = True):
super().__init__()
self.npoint = npoint
if self.npoint is not None:
assert radius is not None
assert nsample is not None
self.grouper = pointnet2_utils.QueryAndGroup(radius, nsample)
else:
self.grouper = pointnet2_utils.GroupAll()
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
def forward(self, xyz: torch.Tensor,
points: torch.Tensor = None) -> (torch.Tensor, torch.Tensor):
r"""
Parameters
----------
xyz : torch.Tensor
(B, N, 3) tensor of the xyz coordinates of the points
point : torch.Tensor
(B, N, C) tensor of the descriptors of the the points
Returns
-------
new_xyz : torch.Tensor
(B, npoint, 3) tensor of the new points' xyz
new_points : torch.Tensor
(B, npoint, mlp[-1]) tensor of the new_points descriptors
"""
if self.npoint is not None:
new_xyz = pointnet2_utils.gather_points(
xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint))
else:
new_xyz = xyz.new([[[0, 0, 0]]]).expand(xyz.size(0), 1, 3)
new_points = self.grouper(xyz, new_xyz,
points) # (B, npoint, nsample, 3 + C)
new_points = self.mlp(new_points.permute(
0, 3, 1, 2)) # (B, mlp[-1], npoint, nsample)
new_points = F.max_pool2d(
new_points,
kernel_size=[1, new_points.size(3)]) # (B, mlp[-1], npoint, 1)
new_points = new_points.squeeze(-1) # (B, mlp[-1], npoint)
new_points = new_points.transpose(
1, 2).contiguous() # (B, npoint, mlp[-1])
return new_xyz, new_points
class PointnetFPModule(nn.Module):
r"""Propigates the features of one set to another
Parameters
----------
mlp : list
Pointnet module parameters
bn : bool
Use batchnorm
"""
def __init__(self, *, mlp: List[int], bn: bool = True):
super().__init__()
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
def forward(self, unknown: torch.Tensor, known: torch.Tensor,
unknow_feats: torch.Tensor,
known_feats: torch.Tensor) -> torch.Tensor:
r"""
Parameters
----------
unknown : torch.Tensor
(B, n, 3) tensor of the xyz positions of the unknown points
known : torch.Tensor
(B, m, 3) tensor of the xyz positions of the known points
unknow_feats : torch.Tensor
(B, n, C1) tensor of the features to be propigated to
known_feats : torch.Tensor
(B, m, C2) tensor of features to be propigated
Returns
-------
new_points : torch.Tensor
(B, n, mlp[-1]) tensor of the features of the unknown points
"""
dist, idx = pointnet2_utils.three_nn(unknown, known)
dist_recip = 1.0 / (dist + 1e-8)
norm = torch.sum(dist_recip, dim=2, keepdim=True)
weight = dist_recip / norm
interpolated_feats = pointnet2_utils.three_interpolate(
known_feats, idx, weight)
if unknow_feats is not None:
new_points = torch.cat(
[interpolated_feats, unknow_feats], dim=-1) #(B, n, C2 + C1)
else:
new_points = interpolated_feats
new_points = new_points.unsqueeze(-1).transpose(1,
2) #(B, C2 + C1, n, 1)
new_points = self.mlp(new_points)
return new_points.squeeze(-1).transpose(
1, 2).contiguous() #(B, n, mlp[-1])
if __name__ == "__main__":
from torch.autograd import Variable
torch.manual_seed(1)
torch.cuda.manual_seed_all(1)
xyz = Variable(torch.randn(2, 10, 3).cuda(), requires_grad=True)
xyz_feats = Variable(torch.randn(2, 10, 6).cuda(), requires_grad=True)
test_module = PointnetSAModuleMSG(
npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]])
test_module.cuda()
print(test_module(xyz, xyz_feats))
# test_module = PointnetFPModule(mlp=[6, 6])
# test_module.cuda()
# from torch.autograd import gradcheck
# inputs = (xyz, xyz, None, xyz_feats)
# test = gradcheck(test_module, inputs, eps=1e-6, atol=1e-4)
# print(test)
for _ in range(1):
_, new_points = test_module(xyz, xyz_feats)
new_points.backward(
torch.cuda.FloatTensor(*new_points.size()).fill_(1))
print(new_points)
print(xyz.grad)
+427
View File
@@ -0,0 +1,427 @@
import torch
from torch.autograd import Variable
from torch.autograd import Function
import torch.nn.functional as F
import torch.nn as nn
from linalg_utils import pdist2, PDist2Order
from collections import namedtuple
import _ext as pointnet2
import pytorch_utils as pt_utils
from typing import List, Tuple
class RandomDropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
super().__init__()
self.p = p
self.inplace = inplace
def forward(self, X):
theta = torch.Tensor(1).uniform_(0, self.p)[0]
return pt_utils.feature_dropout_no_scaling(X, theta, self.train,
self.inplace)
class FurthestPointSampling(Function):
@staticmethod
def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
r"""
Uses iterative furthest point sampling to select a set of npoint points that have the largest
minimum distance
Parameters
---------
xyz : torch.Tensor
(B, N, 3) tensor where N > npoint
npoint : int32
number of points in the sampled set
Returns
torch.Tensor
(B, npoint) tensor containing the set
------
"""
B, N, _ = xyz.size()
output = torch.cuda.IntTensor(B, npoint)
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
xyz = xyz.contiguous()
temp = temp.contiguous()
output = output.contiguous()
pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp,
output)
return output
@staticmethod
def backward(xyz, a=None):
return None, None
furthest_point_sample = FurthestPointSampling.apply
class GatherPoints(Function):
@staticmethod
def forward(ctx, points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
r"""
Uses iterative furthest point sampling to select a set of npoint points that have the largest
minimum distance
Parameters
---------
points : torch.Tensor
(B, N, 3) tensor
idx : torch.Tensor
(B, npoint) tensor of the points to gather
Returns
torch.Tensor
(B, npoint, 3) tensor
------
"""
B, N, C = points.size()
npoint = idx.size(1)
output = torch.cuda.FloatTensor(B, npoint, C)
points = points.contiguous()
idx = idx.contiguous()
output = output.contiguous()
pointnet2.gather_points_wrapper(B, N, C, npoint, points, idx, output)
return output
@staticmethod
def backward(ctx, a=None):
return None, None
gather_points = GatherPoints.apply
class ThreeNN(Function):
@staticmethod
def forward(ctx, unknown: torch.Tensor,
known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
r"""
Find the three nearest neighbors of unknown in known
Parameters
----------
unknown : torch.Tensor
(B, n, 3) tensor of known points
known : torch.Tensor
(B, m, 3) tensor of unknown points
Returns
-------
dist : torch.Tensor
(B, n, 3) l2 distance to the three nearest neighbors
idx : torch.Tensor
(B, n, 3) index of 3 nearest neighbors
"""
B, N, _ = unknown.size()
m = known.size(1)
dist2 = torch.cuda.FloatTensor(B, N, 3)
idx = torch.cuda.IntTensor(B, N, 3)
unknown = unknown.contiguous()
known = known.contiguous()
dist2 = dist2.contiguous()
idx = idx.contiguous()
pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
return torch.sqrt(dist2), idx
@staticmethod
def backward(ctx, a=None, b=None):
return None, None
three_nn = ThreeNN.apply
class ThreeInterpolate(Function):
@staticmethod
def forward(ctx, points: torch.Tensor, idx: torch.Tensor,
weight: torch.Tensor) -> torch.Tensor:
r"""
Performs weight linear interpolation on 3 points
Parameters
----------
points : torch.Tensor
(B, m, c) Points to be interpolated from
idx : torch.Tensor
(B, n, 3) three nearest neighbors of the target points in points
weight : torch.Tensor
(B, n, 3) weights
Returns
-------
torch.Tensor
(B, n, c) tensor of the interpolated points
"""
B, m, c = points.size()
n = idx.size(1)
ctx.three_interpolate_for_backward = (idx, weight, m)
output = torch.cuda.FloatTensor(B, n, c)
points = points.contiguous()
idx = idx.contiguous()
weight = weight.contiguous()
output = output.contiguous()
pointnet2.three_interpolate_wrapper(B, m, c, n, points, idx, weight,
output)
return output
@staticmethod
def backward(ctx, grad_out: torch.Tensor
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
r"""
Parameters
----------
grad_out : torch.Tensor
(B, n, c) tensor with gradients of ouputs
Returns
-------
grad_points : torch.Tensor
(B, m, c) tensor with gradients of points
None
None
"""
idx, weight, m = ctx.three_interpolate_for_backward
B, n, c = grad_out.size()
grad_points = Variable(torch.cuda.FloatTensor(B, m, c).zero_())
grad_out = grad_out.contiguous()
idx = idx.contiguous()
weight = weight.contiguous()
grad_points = grad_points.contiguous()
pointnet2.three_interpolate_grad_wrapper(B, n, c, m, grad_out.data,
idx, weight, grad_points.data)
return grad_points, None, None
three_interpolate = ThreeInterpolate.apply
class GroupPoints(Function):
@staticmethod
def forward(ctx, points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
r"""
Parameters
----------
points : torch.Tensor
(B, N, C) tensor of points to group
idx : torch.Tensor
(B, npoint, nsample) tensor containing the indicies of points to group with
Returns
-------
torch.Tensor
(B, npoint, nsample, C) tensor
"""
B, npoints, nsample = idx.size()
_, N, C = points.size()
output = torch.cuda.FloatTensor(B, npoints, nsample, C)
points = points.contiguous()
idx = idx.contiguous()
output = output.contiguous()
pointnet2.group_points_wrapper(B, N, C, npoints, nsample, points, idx,
output)
ctx.idx_N_C_for_backward = (idx, N, C)
return output
@staticmethod
def backward(ctx,
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
r"""
Parameters
----------
grad_out : torch.Tensor
(B, npoint, nsample, C) tensor of the gradients of the output from forward
Returns
-------
torch.Tensor
(B, N, C) gradient of the points
None
"""
idx, N, C = ctx.idx_N_C_for_backward
B, npoint, nsample, _ = grad_out.size()
grad_points = Variable(torch.cuda.FloatTensor(B, N, C).zero_())
grad_out = grad_out.contiguous()
grad_points = grad_points.contiguous()
pointnet2.group_points_grad_wrapper(
B, N, C, npoint, nsample, grad_out.data, idx, grad_points.data)
return grad_points, None
group_points = GroupPoints.apply
class BallQuery(Function):
@staticmethod
def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor,
new_xyz: torch.Tensor) -> torch.Tensor:
r"""
Parameters
---------
radius : float
radius of the balls
nsample : int
maximum number of points in the balls
xyz : torch.Tensor
(B, N, 3) xyz coordinates of the points
new_xyz : torch.Tensor
(B, npoint, 3) centers of the ball query
Returns
------
torch.Tensor
(B, npoint, nsample) tensor with the indicies of the points that form the query balls
"""
B, N, _ = xyz.size()
npoint = new_xyz.size(1)
idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
new_xyz = new_xyz.contiguous()
xyz = xyz.contiguous()
idx = idx.contiguous()
pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz,
xyz, idx)
return idx
@staticmethod
def backward(ctx, a=None):
return None, None, None, None
ball_query = BallQuery.apply
class QueryAndGroup(nn.Module):
r"""
Groups with a ball query of radius
Parameters
---------
radius : float32
Radius of ball
nsample : int32
Maximum number of points to gather in the ball
"""
def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
super().__init__()
self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
def forward(
self,
xyz: torch.Tensor,
new_xyz: torch.Tensor,
points: torch.Tensor = None) -> Tuple[torch.Tensor]:
r"""
Parameters
---------
xyz : torch.Tensor
xyz coordinates of the points (B, N, 3)
new_xyz : torch.Tensor
centriods (B, npoint, 3)
points : torch.Tensor
Descriptors of the points (B, N, C)
Returns
-------
new_points : torch.Tensor
(B, npoint, nsample, 3 + C) tensor
"""
idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
grouped_xyz = group_points(xyz, idx) # (B, npoint, nsample, 3)
grouped_xyz -= new_xyz.unsqueeze(2)
if points is not None:
grouped_points = group_points(points, idx)
if self.use_xyz:
new_points = torch.cat(
[grouped_xyz, grouped_points],
dim=-1) # (B, npoint, nsample, 3 + C)
else:
new_points = group_points
else:
new_points = grouped_xyz
return new_points
class GroupAll(nn.Module):
r"""
Groups all points
Parameters
---------
"""
def __init__(self, use_xyz: bool = True):
super().__init__()
self.use_xyz = use_xyz
def forward(
self,
xyz: torch.Tensor,
new_xyz: torch.Tensor,
points: torch.Tensor = None) -> Tuple[torch.Tensor]:
r"""
Parameters
---------
xyz : torch.Tensor
xyz coordinates of the points (B, N, 3)
new_xyz : torch.Tensor
centriods (B, npoint, 3)
points : torch.Tensor
Descriptors of the points (B, N, C)
Returns
-------
new_points : torch.Tensor
(B, npoint, nsample, 3 + C) tensor
"""
grouped_xyz = xyz.view(xyz.size(0), 1, xyz.size(1), xyz.size(2))
if points is not None:
grouped_points = points.view(points.size(0), 1, points.size(1), points.size(2))
if self.use_xyz:
new_points = torch.cat(
[grouped_xyz, grouped_points],
dim=-1) # (B, npoint, nsample, 3 + C)
else:
new_points = group_points
else:
new_points = grouped_xyz
return new_points
+658
View File
@@ -0,0 +1,658 @@
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.autograd.function import InplaceFunction
from itertools import repeat
import numpy as np
import tensorboard_logger as tb_log
import shutil, os
from tqdm import tqdm
from natsort import natsorted
from operator import itemgetter
from typing import List, Tuple
from scipy.stats import t as student_t
import statistics as stats
import math
class SharedMLP(nn.Sequential):
def __init__(self,
args: List[int],
*,
bn: bool = False,
activation=nn.ReLU(inplace=True),
name: str = ""):
super().__init__()
for i in range(len(args) - 1):
self.add_module(name + 'layer{}'.format(i),
Conv2d(
args[i],
args[i + 1],
bn=bn,
activation=activation))
class _ConvBase(nn.Sequential):
def __init__(self,
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=None,
batch_norm=None,
bias=True,
name=""):
super().__init__()
bias = bias and (not bn)
self.add_module(name + 'conv',
conv(
in_size,
out_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=bias))
init(self[0].weight)
if bias:
nn.init.constant(self[0].bias, 0)
if bn:
self.add_module(name + 'bn', batch_norm(out_size))
nn.init.constant(self[1].weight, 1)
nn.init.constant(self[1].bias, 0)
if activation is not None:
self.add_module(name + 'activation', activation)
class Conv1d(_ConvBase):
def __init__(self,
in_size: int,
out_size: int,
*,
kernel_size: int = 1,
stride: int = 1,
padding: int = 0,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal,
bias: bool = True,
name: str = ""):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv1d,
batch_norm=nn.BatchNorm1d,
bias=bias,
name=name)
class Conv2d(_ConvBase):
def __init__(self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int] = (1, 1),
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal,
bias: bool = True,
name: str = ""):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv2d,
batch_norm=nn.BatchNorm2d,
bias=bias,
name=name)
class Conv3d(_ConvBase):
def __init__(self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int, int] = (1, 1, 1),
stride: Tuple[int, int, int] = (1, 1, 1),
padding: Tuple[int, int, int] = (0, 0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal,
bias: bool = True,
name: str = ""):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv3d,
batch_norm=nn.BatchNorm3d,
bias=bias,
name=name)
class FC(nn.Sequential):
def __init__(self,
in_size: int,
out_size: int,
*,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=None,
name: str = ""):
super().__init__()
self.add_module(name + 'fc', nn.Linear(in_size, out_size, bias=not bn))
if init is not None:
init(self[0].weight)
if not bn:
nn.init.constant(self[0].bias, 0)
if bn:
self.add_module(name + 'bn', nn.BatchNorm1d(out_size))
nn.init.constant(self[1].weight, 1)
nn.init.constant(self[1].bias, 0)
if activation is not None:
self.add_module(name + 'activation', activation)
class _DropoutNoScaling(InplaceFunction):
@staticmethod
def _make_noise(input):
return input.new().resize_as_(input)
@staticmethod
def symbolic(g, input, p=0.5, train=False, inplace=False):
if inplace:
return None
n = g.appendNode(
g.create("Dropout", [input]).f_("ratio", p).i_(
"is_test", not train))
real = g.appendNode(g.createSelect(n, 0))
g.appendNode(g.createSelect(n, 1))
return real
@classmethod
def forward(cls, ctx, input, p=0.5, train=False, inplace=False):
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, "
"but got {}".format(p))
ctx.p = p
ctx.train = train
ctx.inplace = inplace
if ctx.inplace:
ctx.mark_dirty(input)
output = input
else:
output = input.clone()
if ctx.p > 0 and ctx.train:
ctx.noise = cls._make_noise(input)
if ctx.p == 1:
ctx.noise.fill_(0)
else:
ctx.noise.bernoulli_(1 - ctx.p)
ctx.noise = ctx.noise.expand_as(input)
output.mul_(ctx.noise)
return output
@staticmethod
def backward(ctx, grad_output):
if ctx.p > 0 and ctx.train:
return grad_output.mul(Variable(ctx.noise)), None, None, None
else:
return grad_output, None, None, None
dropout_no_scaling = _DropoutNoScaling.apply
class _FeatureDropoutNoScaling(_DropoutNoScaling):
@staticmethod
def symbolic(input, p=0.5, train=False, inplace=False):
return None
@staticmethod
def _make_noise(input):
return input.new().resize_(
input.size(0), input.size(1), *repeat(1,
input.dim() - 2))
feature_dropout_no_scaling = _FeatureDropoutNoScaling.apply
def checkpoint_state(model=None, optimizer=None, best_prec=None, epoch=None):
return {
'epoch':
epoch,
'best_prec':
best_prec,
'model_state':
model.state_dict() if model is not None else None,
'optimizer_state':
optimizer.state_dict() if optimizer is not None else None
}
def save_checkpoint(state,
is_best,
filename='checkpoint',
bestname='model_best'):
filename = '{}.pth.tar'.format(filename)
torch.save(state, filename)
if is_best:
shutil.copyfile(filename, '{}.pth.tar'.format(bestname))
def load_checkpoint(model=None, optimizer=None, filename='checkpoint'):
filename = "{}.pth.tar".format(filename)
if os.path.isfile(filename):
print("==> Loading from checkpoint '{}'".format(filename))
checkpoint = torch.load(filename)
epoch = checkpoint['epoch']
best_prec = checkpoint['best_prec']
if model is not None and checkpoint['model_state'] is not None:
model.load_state_dict(checkpoint['model_state'])
if optimizer is not None and checkpoint['optimizer_state'] is not None:
optimizer.load_state_dict(checkpoint['optimizer_state'])
print("==> Done")
else:
print("==> Checkpoint '{}' not found".format(filename))
return epoch, best_prec
def variable_size_collate(pad_val=0, use_shared_memory=True):
import collections
_numpy_type_map = {
'float64': torch.DoubleTensor,
'float32': torch.FloatTensor,
'float16': torch.HalfTensor,
'int64': torch.LongTensor,
'int32': torch.IntTensor,
'int16': torch.ShortTensor,
'int8': torch.CharTensor,
'uint8': torch.ByteTensor,
}
def wrapped(batch):
"Puts each data field into a tensor with outer dimension batch size"
error_msg = "batch must contain tensors, numbers, dicts or lists; found {}"
elem_type = type(batch[0])
if torch.is_tensor(batch[0]):
max_len = 0
for b in batch:
max_len = max(max_len, b.size(0))
numel = sum([int(b.numel() / b.size(0) * max_len) for b in batch])
if use_shared_memory:
# If we're in a background process, concatenate directly into a
# shared memory tensor to avoid an extra copy
storage = batch[0].storage()._new_shared(numel)
out = batch[0].new(storage)
else:
out = batch[0].new(numel)
out = out.view(
len(batch), max_len,
*[batch[0].size(i) for i in range(1, batch[0].dim())])
out.fill_(pad_val)
for i in range(len(batch)):
out[i, 0:batch[i].size(0)] = batch[i]
return out
elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \
and elem_type.__name__ != 'string_':
elem = batch[0]
if elem_type.__name__ == 'ndarray':
# array of string classes and object
if re.search('[SaUO]', elem.dtype.str) is not None:
raise TypeError(error_msg.format(elem.dtype))
return wrapped([torch.from_numpy(b) for b in batch])
if elem.shape == (): # scalars
py_type = float if elem.dtype.name.startswith('float') else int
return _numpy_type_map[elem.dtype.name](list(
map(py_type, batch)))
elif isinstance(batch[0], int):
return torch.LongTensor(batch)
elif isinstance(batch[0], float):
return torch.DoubleTensor(batch)
elif isinstance(batch[0], collections.Mapping):
return {key: wrapped([d[key] for d in batch]) for key in batch[0]}
elif isinstance(batch[0], collections.Sequence):
transposed = zip(*batch)
return [wrapped(samples) for samples in transposed]
raise TypeError((error_msg.format(type(batch[0]))))
return wrapped
class TrainValSplitter():
r"""
Creates a training and validation split to be used as the sampler in a pytorch DataLoader
Parameters
---------
numel : int
Number of elements in the entire training dataset
percent_train : float
Percentage of data in the training split
shuffled : bool
Whether or not shuffle which data goes to which split
"""
def __init__(self,
*,
numel: int,
percent_train: float,
shuffled: bool = False):
indicies = np.array([i for i in range(numel)])
if shuffled:
np.random.shuffle(indicies)
self.train = torch.utils.data.sampler.SubsetRandomSampler(
indicies[0:int(percent_train * numel)])
self.val = torch.utils.data.sampler.SubsetRandomSampler(
indicies[int(percent_train * numel):-1])
class CrossValSplitter():
r"""
Class that creates cross validation splits. The train and val splits can be used in pytorch DataLoaders. The splits can be updated
by calling next(self) or using a loop:
for _ in self:
....
Parameters
---------
numel : int
Number of elements in the training set
k_folds : int
Number of folds
shuffled : bool
Whether or not to shuffle which data goes in which fold
"""
def __init__(self, *, numel: int, k_folds: int, shuffled: bool = False):
inidicies = np.array([i for i in range(numel)])
if shuffled:
np.random.shuffle(inidicies)
self.folds = np.array(np.array_split(inidicies, k_folds), dtype=object)
self.current_v_ind = -1
self.val = torch.utils.data.sampler.SubsetRandomSampler(self.folds[0])
self.train = torch.utils.data.sampler.SubsetRandomSampler(
np.concatenate(self.folds[1:], axis=0))
self.metrics = {}
def __iter__(self):
self.current_v_ind = -1
return self
def __len__(self):
return len(self.folds)
def __getitem__(self, idx):
assert idx >= 0 and idx < len(self)
self.val.inidicies = self.folds[idx]
self.train.inidicies = np.concatenate(
self.folds[np.arange(len(self)) != idx], axis=0)
def __next__(self):
self.current_v_ind += 1
if self.current_v_ind >= len(self):
raise StopIteration
self[self.current_v_ind]
def update_metrics(self, to_post: dict):
for k, v in to_post.items():
if k in self.metrics:
self.metrics[k].append(v)
else:
self.metrics[k] = [v]
def print_metrics(self):
for name, samples in self.metrics.items():
xbar = stats.mean(samples)
sx = stats.stdev(samples, xbar)
tstar = student_t.ppf(1.0 - 0.025, len(samples) - 1)
margin_of_error = tstar * sx / sqrt(len(samples))
print("{}: {} +/- {}".format(name, xbar, margin_of_error))
def set_bn_momentum_default(bn_momentum):
def fn(m):
if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
m.momentum = bn_momentum
return fn
class BNMomentumScheduler(object):
def __init__(self,
model,
bn_lambda,
last_epoch=-1,
setter=set_bn_momentum_default):
if not isinstance(model, nn.Module):
raise RuntimeError("Class '{}' is not a PyTorch nn Module".format(
type(model).__name__))
self.model = model
self.setter = setter
self.lmbd = bn_lambda
self.step(last_epoch + 1)
self.last_epoch = last_epoch
def step(self, epoch=None):
if epoch is None:
epoch = self.last_epoch + 1
self.last_epoch = epoch
self.model.apply(self.setter(self.lmbd(epoch)))
class Trainer(object):
r"""
Reasonably generic trainer for pytorch models
Parameters
----------
model : pytorch model
Model to be trained
model_fn : function (model, inputs, labels) -> preds, loss, accuracy
optimizer : torch.optim
Optimizer for model
checkpoint_name : str
Name of file to save checkpoints to
best_name : str
Name of file to save best model to
lr_scheduler : torch.optim.lr_scheduler
Learning rate scheduler. .step() will be called at the start of every epoch
bnm_scheduler : BNMomentumScheduler
Batchnorm momentum scheduler. .step() will be called at the start of every epoch
eval_frequency : int
How often to run an eval
log_name : str
Name of file to output tensorboard_logger to
"""
def __init__(self,
model,
model_fn,
optimizer,
checkpoint_name="ckpt",
best_name="best",
lr_scheduler=None,
bnm_scheduler=None,
eval_frequency=1,
log_name=None):
self.model, self.model_fn, self.optimizer, self.lr_scheduler, self.bnm_scheduler = (
model, model_fn, optimizer, lr_scheduler, bnm_scheduler)
self.checkpoint_name, self.best_name = checkpoint_name, best_name
self.eval_frequency = eval_frequency
if log_name is not None:
tb_log.configure(log_name)
self.logging = True
else:
self.logging = False
@staticmethod
def _print(mode, epoch, loss, eval_dict, count):
to_print = "[{:d}] {}\tMean Loss: {:.4e}".format(
epoch, mode, loss / count)
for k, v in natsorted(eval_dict.items(), key=itemgetter(0)):
to_print += "\tMean {}: {:2.3f}%".format(k, stats.mean(v) * 1e2)
print(to_print)
def _train_epoch(self, epoch, d_loader):
self.model.train()
total_loss = 0.0
count = 0.0
eval_dict = {}
for i, data in tqdm(enumerate(d_loader, 0), total=len(d_loader)):
if self.lr_scheduler is not None:
self.lr_scheduler.step(epoch - 1 + i / len(d_loader))
if self.bnm_scheduler is not None:
self.bnm_scheduler.step(epoch - 1 + i / len(d_loader))
self.optimizer.zero_grad()
_, loss, eval_res = self.model_fn(self.model, data, epoch=epoch)
loss.backward()
self.optimizer.step()
total_loss += loss.data[0]
for k, v in eval_res.items():
if v is not None:
eval_dict[k] = eval_dict.get(k, []) + [v]
count += 1.0
if self.logging:
idx = (epoch - 1) * len(d_loader) + i
tb_log.log_value("Training loss", loss.data[0], step=idx)
for k, v in eval_res.items():
if v is not None:
tb_log.log_value(
"Training {}".format(k), 1.0 - v, step=idx)
d_loader.dataset.randomize()
self._print("Train", epoch, total_loss, eval_dict, count)
def eval_epoch(self, epoch, d_loader):
if d_loader is None:
return
self.model.eval()
total_loss = 0.0
eval_dict = {}
count = 0.0
for i, data in tqdm(enumerate(d_loader, 0), total=len(d_loader)):
self.optimizer.zero_grad()
_, loss, eval_res = self.model_fn(
self.model, data, eval=True, epoch=epoch)
total_loss += loss.data[0]
count += 1
for k, v in eval_res.items():
if v is not None:
eval_dict[k] = eval_dict.get(k, []) + [v]
if self.logging:
idx = (epoch - 1) * len(d_loader) + i
tb_log.log_value("Eval loss", loss.data[0], step=idx)
for k, v in eval_res.items():
if v is not None:
tb_log.log_value(
"Eval {}".format(k), 1.0 - v, step=idx)
d_loader.dataset.randomize()
self._print("Eval", epoch, total_loss, eval_dict, count)
return total_loss / count, eval_dict
def train(self,
start_epoch,
n_epochs,
train_loader,
test_loader=None,
best_loss=0.0):
r"""
Call to begin training the model
Parameters
----------
start_epoch : int
Epoch to start at
n_epochs : int
Number of epochs to train for
test_loader : torch.utils.data.DataLoader
DataLoader of the test_data
train_loader : torch.utils.data.DataLoader
DataLoader of training data
best_loss : float
Testing loss of the best model
"""
for epoch in range(start_epoch, n_epochs + 1):
print("\n{0} Train Epoch {1:0>3d} {0}\n".format("-" * 5, epoch))
self._train_epoch(epoch, train_loader)
if test_loader is not None and (epoch % self.eval_frequency) == 0:
print("\n{0} Eval Epoch {1:0>3d} {0}\n".format("-" * 5, epoch))
val_loss, _ = self.eval_epoch(epoch, test_loader)
is_best = val_loss < best_loss
best_loss = min(best_loss, val_loss)
save_checkpoint(
checkpoint_state(self.model, self.optimizer, val_loss,
epoch),
is_best,
filename=self.checkpoint_name,
bestname=self.best_name)
return best_loss