diff --git a/CMakeLists.txt b/CMakeLists.txt
index dfa20f4..09c16c9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,7 @@ cuda_compile(cuda_kernels SHARED ${cuda_kernels_src} OPTIONS -O3)
 set(BUILD_CMD python "${CMAKE_CURRENT_SOURCE_DIR}/utils/build_ffi.py")
 file(GLOB wrapper_headers "${CMAKE_CURRENT_SOURCE_DIR}/utils/cinclude/*wrapper.h")
 file(GLOB wrapper_sources "${CMAKE_CURRENT_SOURCE_DIR}/utils/csrs/*.c")
-add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/__ext.so"
+add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so"
 		   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/utils
 		   COMMAND ${BUILD_CMD} --build --objs ${cuda_kernels}
 		   DEPENDS ${cuda_kernels}
@@ -20,6 +20,6 @@ add_custom_command(OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/__ext.so"
 		   VERBATIM)
 
 add_custom_target(ext ALL
-		  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/__ext.so")
+		  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext/pointnet2/_pointnet2.so")
 
 set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/_ext")
diff --git a/data/Indoor3DSemSegLoader.py b/data/Indoor3DSemSegLoader.py
index f58a9a3..94569e9 100644
--- a/data/Indoor3DSemSegLoader.py
+++ b/data/Indoor3DSemSegLoader.py
@@ -16,12 +16,10 @@ def _load_data_file(name):
 
 
 class Indoor3DSemSeg(data.Dataset):
-    def __init__(self,
-                 num_points,
-                 root,
-                 train=True,
-                 download=True,
-                 data_precent=1.0):
+
+    def __init__(
+            self, num_points, root, train=True, download=True, data_precent=1.0
+    ):
         super().__init__()
         self.data_precent = data_precent
         root = os.path.abspath(root)
@@ -32,18 +30,23 @@ class Indoor3DSemSeg(data.Dataset):
         if download and not os.path.exists(self.data_dir):
             zipfile = os.path.join(root, os.path.basename(self.url))
             subprocess.check_call(
-                shlex.split("curl {} -o {}".format(self.url, zipfile)))
+                shlex.split("curl {} -o {}".format(self.url, zipfile))
+            )
 
-            subprocess.check_call(shlex.split("unzip {} -d {}".format(zipfile, root)))
+            subprocess.check_call(
+                shlex.split("unzip {} -d {}".format(zipfile, root))
+            )
 
             subprocess.check_call(shlex.split("rm {}".format(zipfile)))
 
         self.train, self.num_points = train, num_points
 
         all_files = _get_data_files(
-            os.path.join(self.data_dir, "all_files.txt"))
+            os.path.join(self.data_dir, "all_files.txt")
+        )
         room_filelist = _get_data_files(
-            os.path.join(self.data_dir, "room_filelist.txt"))
+            os.path.join(self.data_dir, "room_filelist.txt")
+        )
 
         data_batchlist, label_batchlist = [], []
         for f in all_files:
@@ -74,9 +77,11 @@ class Indoor3DSemSeg(data.Dataset):
         np.random.shuffle(pt_idxs)
 
         current_points = torch.from_numpy(self.points[idx, pt_idxs, :]).type(
-            torch.FloatTensor)
+            torch.FloatTensor
+        )
         current_labels = torch.from_numpy(self.labels[idx, pt_idxs]).type(
-            torch.LongTensor)
+            torch.LongTensor
+        )
 
         return current_points, current_labels
 
diff --git a/data/ModelNet40Loader.py b/data/ModelNet40Loader.py
index c237d4e..3f98719 100644
--- a/data/ModelNet40Loader.py
+++ b/data/ModelNet40Loader.py
@@ -19,12 +19,10 @@ def _load_data_file(name):
 
 
 class ModelNet40Cls(data.Dataset):
-    def __init__(self,
-                 num_points,
-                 root,
-                 transforms=None,
-                 train=True,
-                 download=True):
+
+    def __init__(
+            self, num_points, root, transforms=None, train=True, download=True
+    ):
         super().__init__()
 
         self.transforms = transforms
@@ -37,9 +35,12 @@ class ModelNet40Cls(data.Dataset):
         if download and not os.path.exists(self.data_dir):
             zipfile = os.path.join(root, os.path.basename(self.url))
             subprocess.check_call(
-                shlex.split("curl {} -o {}".format(self.url, zipfile)))
+                shlex.split("curl {} -o {}".format(self.url, zipfile))
+            )
 
-            subprocess.check_call(shlex.split("unzip {} -d {}".format(zipfile, root)))
+            subprocess.check_call(
+                shlex.split("unzip {} -d {}".format(zipfile, root))
+            )
 
             subprocess.check_call(shlex.split("rm {}".format(zipfile)))
 
@@ -83,9 +84,10 @@ class ModelNet40Cls(data.Dataset):
     def randomize(self):
         self.actual_number_of_points = min(
             max(
-                np.random.randint(self.num_points * 0.8,
-                                  self.num_points * 1.2), 1),
-            self.points.shape[1])
+                np.random.randint(self.num_points * 0.8, self.num_points * 1.2),
+                1
+            ), self.points.shape[1]
+        )
 
 
 if __name__ == "__main__":
diff --git a/models/Pointnet2Cls.py b/models/Pointnet2Cls.py
index b598626..29263e2 100644
--- a/models/Pointnet2Cls.py
+++ b/models/Pointnet2Cls.py
@@ -39,6 +39,7 @@ def model_fn_decorator(criterion):
 
 
 class Pointnet2SSG(nn.Module):
+
     def __init__(self, num_classes, input_channels=9):
         super().__init__()
 
@@ -48,13 +49,17 @@ class Pointnet2SSG(nn.Module):
                 npoint=512,
                 radius=0.2,
                 nsample=64,
-                mlp=[input_channels, 64, 64, 128]))
+                mlp=[input_channels, 64, 64, 128]
+            )
+        )
         self.SA_modules.append(
             PointnetSAModule(
                 npoint=128,
                 radius=0.4,
                 nsample=64,
-                mlp=[128 + 3, 128, 128, 256]))
+                mlp=[128 + 3, 128, 128, 256]
+            )
+        )
         self.SA_modules.append(PointnetSAModule(mlp=[256 + 3, 256, 512, 1024]))
 
         self.FC_layer = nn.Sequential(
@@ -62,7 +67,8 @@ class Pointnet2SSG(nn.Module):
             nn.Dropout(p=0.5),
             pt_utils.FC(512, 256, bn=True),
             nn.Dropout(p=0.5),
-            pt_utils.FC(256, num_classes, activation=None))
+            pt_utils.FC(256, num_classes, activation=None)
+        )
 
     def forward(self, xyz, points=None):
         for module in self.SA_modules:
@@ -72,6 +78,7 @@ class Pointnet2SSG(nn.Module):
 
 
 class Pointnet2MSG(nn.Module):
+
     def __init__(self, num_classes, input_channels=9):
         super().__init__()
 
@@ -83,7 +90,9 @@ class Pointnet2MSG(nn.Module):
                 nsamples=[32, 64, 128],
                 mlps=[[input_channels, 32, 32,
                        64], [input_channels, 64, 64, 128],
-                      [input_channels, 64, 96, 128]]))
+                      [input_channels, 64, 96, 128]]
+            )
+        )
 
         input_channels = 64 + 128 + 128 + 3
         self.SA_modules.append(
@@ -92,17 +101,21 @@ class Pointnet2MSG(nn.Module):
                 radii=[0.2, 0.4, 0.8],
                 nsamples=[16, 32, 64],
                 mlps=[[input_channels, 64, 64,
-                      128], [input_channels, 128, 128, 256],
-                     [input_channels, 128, 128, 256]]))
+                       128], [input_channels, 128, 128, 256],
+                      [input_channels, 128, 128, 256]]
+            )
+        )
         self.SA_modules.append(
-            PointnetSAModule(mlp=[128 + 256 + 256 + 3, 256, 512, 1024]))
+            PointnetSAModule(mlp=[128 + 256 + 256 + 3, 256, 512, 1024])
+        )
 
         self.FC_layer = nn.Sequential(
             pt_utils.FC(1024, 512, bn=True),
             nn.Dropout(p=0.5),
             pt_utils.FC(512, 256, bn=True),
             nn.Dropout(p=0.5),
-            pt_utils.FC(256, num_classes, activation=None))
+            pt_utils.FC(256, num_classes, activation=None)
+        )
 
     def forward(self, xyz, points=None):
         for module in self.SA_modules:
diff --git a/models/Pointnet2SemSeg.py b/models/Pointnet2SemSeg.py
index b076331..6ebaaa6 100644
--- a/models/Pointnet2SemSeg.py
+++ b/models/Pointnet2SemSeg.py
@@ -38,6 +38,7 @@ def model_fn_decorator(criterion):
 
 
 class Pointnet2SSG(nn.Module):
+
     def __init__(self, num_classes, input_channels=9):
         super().__init__()
 
@@ -49,32 +50,37 @@ class Pointnet2SSG(nn.Module):
                 npoint=1024,
                 radius=0.1,
                 nsample=32,
-                mlp=[input_channels, 32, 32, 64]))
+                mlp=[input_channels, 32, 32, 64]
+            )
+        )
         self.SA_modules.append(
             PointnetSAModule(
-                npoint=256, radius=0.2, nsample=32, mlp=[64 + 3, 64, 64, 128]))
+                npoint=256, radius=0.2, nsample=32, mlp=[64 + 3, 64, 64, 128]
+            )
+        )
         self.SA_modules.append(
             PointnetSAModule(
-                npoint=64,
-                radius=0.4,
-                nsample=32,
-                mlp=[128 + 3, 128, 128, 256]))
+                npoint=64, radius=0.4, nsample=32, mlp=[128 + 3, 128, 128, 256]
+            )
+        )
         self.SA_modules.append(
             PointnetSAModule(
-                npoint=16,
-                radius=0.8,
-                nsample=32,
-                mlp=[256 + 3, 256, 256, 512]))
+                npoint=16, radius=0.8, nsample=32, mlp=[256 + 3, 256, 256, 512]
+            )
+        )
 
         self.FP_modules = nn.ModuleList()
-        self.FP_modules.append(PointnetFPModule(mlp=[128 + input_channels - 3, 128, 128, 128]))
+        self.FP_modules.append(
+            PointnetFPModule(mlp=[128 + input_channels - 3, 128, 128, 128])
+        )
         self.FP_modules.append(PointnetFPModule(mlp=[256 + 64, 256, 128]))
         self.FP_modules.append(PointnetFPModule(mlp=[256 + 128, 256, 256]))
         self.FP_modules.append(PointnetFPModule(mlp=[512 + 256, 256, 256]))
 
         self.FC_layer = nn.Sequential(
             pt_utils.Conv1d(128, 128, bn=True), nn.Dropout(),
-            pt_utils.Conv1d(128, num_classes, activation=None))
+            pt_utils.Conv1d(128, num_classes, activation=None)
+        )
 
     def forward(self, xyz, points=None):
         if points is not None:
@@ -91,13 +97,17 @@ class Pointnet2SSG(nn.Module):
             l_points.append(li_points)
 
         for i in range(-1, -(len(self.FP_modules + 1) - 1), -1):
-            l_points[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i],
-                                                 l_points[i - 1], l_points[i])
+            l_points[i - 1] = self.FP_modules[i](
+                l_xyz[i - 1], l_xyz[i], l_points[i - 1], l_points[i]
+            )
 
-        return self.FC_layer(l_points[0].transpose(1, 2)).transpose(1, 2).contiguous()
+        return self.FC_layer(l_points[0].transpose(1,
+                                                   2)).transpose(1,
+                                                                 2).contiguous()
 
 
 class Pointnet2MSG(nn.Module):
+
     def __init__(self, num_classes, input_channels=9):
         super().__init__()
 
@@ -111,7 +121,9 @@ class Pointnet2MSG(nn.Module):
                 npoint=1024,
                 radii=[0.05, 0.1],
                 nsamples=[16, 32],
-                mlps=[[c_in, 16, 16, 32], [c_in, 32, 32, 64]]))
+                mlps=[[c_in, 16, 16, 32], [c_in, 32, 32, 64]]
+            )
+        )
         c_out_0 = 32 + 64
 
         c_in = c_out_0 + 3
@@ -120,7 +132,9 @@ class Pointnet2MSG(nn.Module):
                 npoint=256,
                 radii=[0.1, 0.2],
                 nsamples=[16, 32],
-                mlps=[[c_in, 64, 64, 128], [c_in, 64, 96, 128]]))
+                mlps=[[c_in, 64, 64, 128], [c_in, 64, 96, 128]]
+            )
+        )
         c_out_1 = 128 + 128
 
         c_in = c_out_1 + 3
@@ -129,7 +143,9 @@ class Pointnet2MSG(nn.Module):
                 npoint=64,
                 radii=[0.2, 0.4],
                 nsamples=[16, 32],
-                mlps=[[c_in, 128, 196, 256], [c_in, 128, 196, 256]]))
+                mlps=[[c_in, 128, 196, 256], [c_in, 128, 196, 256]]
+            )
+        )
         c_out_2 = 256 + 256
 
         c_in = c_out_2 + 3
@@ -138,20 +154,25 @@ class Pointnet2MSG(nn.Module):
                 npoint=16,
                 radii=[0.4, 0.8],
                 nsamples=[16, 32],
-                mlps=[[c_in, 256, 256, 512], [c_in, 256, 384, 512]]))
+                mlps=[[c_in, 256, 256, 512], [c_in, 256, 384, 512]]
+            )
+        )
         c_out_3 = 512 + 512
 
         self.FP_modules = nn.ModuleList()
         self.FP_modules.append(
-            PointnetFPModule(mlp=[256 + input_channels - 3, 128, 128]))
+            PointnetFPModule(mlp=[256 + input_channels - 3, 128, 128])
+        )
         self.FP_modules.append(PointnetFPModule(mlp=[512 + c_out_0, 256, 256]))
         self.FP_modules.append(PointnetFPModule(mlp=[512 + c_out_1, 512, 512]))
         self.FP_modules.append(
-            PointnetFPModule(mlp=[c_out_3 + c_out_2, 512, 512]))
+            PointnetFPModule(mlp=[c_out_3 + c_out_2, 512, 512])
+        )
 
         self.FC_layer = nn.Sequential(
             pt_utils.Conv1d(128, 128, bn=True), nn.Dropout(),
-            pt_utils.Conv1d(128, num_classes, activation=None))
+            pt_utils.Conv1d(128, num_classes, activation=None)
+        )
 
     def forward(self, xyz, points=None):
         if points is not None and self.initial_dropout is not None:
@@ -167,10 +188,13 @@ class Pointnet2MSG(nn.Module):
             l_points.append(li_points)
 
         for i in range(-1, -(len(self.FP_modules) + 1), -1):
-            l_points[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i],
-                                                 l_points[i - 1], l_points[i])
+            l_points[i - 1] = self.FP_modules[i](
+                l_xyz[i - 1], l_xyz[i], l_points[i - 1], l_points[i]
+            )
 
-        return self.FC_layer(l_points[0].transpose(1, 2)).transpose(1, 2).contiguous()
+        return self.FC_layer(l_points[0].transpose(1,
+                                                   2)).transpose(1,
+                                                                 2).contiguous()
 
 
 if __name__ == "__main__":
diff --git a/train_cls.py b/train_cls.py
index 887adb5..8ced965 100644
--- a/train_cls.py
+++ b/train_cls.py
@@ -16,56 +16,62 @@ import utils.pytorch_utils as pt_utils
 import utils.data_utils as d_utils
 import argparse
 
+
 def parse_args():
     parser = argparse.ArgumentParser(
         description="Arguments for cls training",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        "-batch_size", type=int, default=16, help="Batch size")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("-batch_size", type=int, default=16, help="Batch size")
     parser.add_argument(
         "-num_points",
         type=int,
         default=1024,
-        help="Number of points to train with")
+        help="Number of points to train with"
+    )
     parser.add_argument(
-        "-weight_decay", type=float, default=1e-5, help="L2 regularization coeff")
-    parser.add_argument(
-        "-lr",
+        "-weight_decay",
         type=float,
-        default=1e-2,
-        help="Initial learning rate")
+        default=1e-5,
+        help="L2 regularization coeff"
+    )
     parser.add_argument(
-        "-lr_decay",
-        type=float,
-        default=0.7,
-        help="Learning rate decay gamma")
+        "-lr", type=float, default=1e-2, help="Initial learning rate"
+    )
     parser.add_argument(
-        "-decay_step",
-        type=int,
-        default=20,
-        help="Learning rate decay step")
+        "-lr_decay", type=float, default=0.7, help="Learning rate decay gamma"
+    )
+    parser.add_argument(
+        "-decay_step", type=int, default=20, help="Learning rate decay step"
+    )
     parser.add_argument(
         "-bn_momentum",
         type=float,
         default=0.5,
-        help="Initial batch norm momentum")
+        help="Initial batch norm momentum"
+    )
     parser.add_argument(
         "-bnm_decay",
         type=float,
         default=0.5,
-        help="Batch norm momentum decay gamma")
+        help="Batch norm momentum decay gamma"
+    )
     parser.add_argument(
-        "-checkpoint", type=str, default=None, help="Checkpoint to start from")
+        "-checkpoint", type=str, default=None, help="Checkpoint to start from"
+    )
     parser.add_argument(
-        "-epochs", type=int, default=200, help="Number of epochs to train for")
+        "-epochs", type=int, default=200, help="Number of epochs to train for"
+    )
     parser.add_argument(
         "-run_name",
         type=str,
         default="cls_run_1",
-        help="Name for run in tensorboard_logger")
+        help="Name for run in tensorboard_logger"
+    )
 
     return parser.parse_args()
 
+
 lr_clip = 1e-5
 bnm_clip = 1e-2
 
@@ -82,13 +88,15 @@ if __name__ == "__main__":
     ])
 
     test_set = ModelNet40Cls(
-        args.num_points, BASE_DIR, transforms=transforms, train=False)
+        args.num_points, BASE_DIR, transforms=transforms, train=False
+    )
     test_loader = DataLoader(
         test_set,
         batch_size=args.batch_size,
         shuffle=True,
         num_workers=2,
-        pin_memory=True)
+        pin_memory=True
+    )
 
     train_set = ModelNet40Cls(args.num_points, BASE_DIR, transforms=transforms)
     train_loader = DataLoader(
@@ -96,25 +104,30 @@ if __name__ == "__main__":
         batch_size=args.batch_size,
         shuffle=True,
         num_workers=2,
-        pin_memory=True)
+        pin_memory=True
+    )
 
     tb_log.configure('runs/{}'.format(args.run_name))
 
     model = Pointnet(input_channels=3, num_classes=40)
     model.cuda()
     optimizer = optim.Adam(
-        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
+        model.parameters(), lr=args.lr, weight_decay=args.weight_decay
+    )
     lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), lr_clip / args.lr)
     bn_lbmd = lambda e: max(args.bn_momentum * args.bnm_decay**(e // args.decay_step), bnm_clip)
 
     if args.checkpoint is not None:
         start_epoch, best_loss = pt_utils.load_checkpoint(
-            model, optimizer, filename=args.checkpoint.split(".")[0])
+            model, optimizer, filename=args.checkpoint.split(".")[0]
+        )
 
         lr_scheduler = lr_sched.LambdaLR(
-            optimizer, lr_lambda=lr_lbmd, last_epoch=start_epoch)
+            optimizer, lr_lambda=lr_lbmd, last_epoch=start_epoch
+        )
         bnm_scheduler = pt_utils.BNMomentumScheduler(
-            model, bn_lambda=bn_lbmd, last_epoch=start_epoch)
+            model, bn_lambda=bn_lbmd, last_epoch=start_epoch
+        )
     else:
         lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lambda=lr_lbmd)
         bnm_scheduler = pt_utils.BNMomentumScheduler(model, bn_lambda=bn_lbmd)
@@ -131,14 +144,16 @@ if __name__ == "__main__":
         checkpoint_name="cls_checkpoint",
         best_name="cls_best",
         lr_scheduler=lr_scheduler,
-        bnm_scheduler=bnm_scheduler)
+        bnm_scheduler=bnm_scheduler
+    )
 
     trainer.train(
         start_epoch,
         args.epochs,
         train_loader,
         test_loader,
-        best_loss=best_loss)
+        best_loss=best_loss
+    )
 
     if start_epoch == args.epochs:
         _ = trainer.eval_epoch(start_epoch, test_loader)
diff --git a/train_sem_seg.py b/train_sem_seg.py
index 76d1cb0..995e9d0 100644
--- a/train_sem_seg.py
+++ b/train_sem_seg.py
@@ -18,51 +18,62 @@ import argparse
 
 parser = argparse.ArgumentParser(description="Arg parser")
 parser.add_argument(
-    "-batch_size", type=int, default=32, help="Batch size [default: 32]")
+    "-batch_size", type=int, default=32, help="Batch size [default: 32]"
+)
 parser.add_argument(
     "-num_points",
     type=int,
     default=2048,
-    help="Number of points to train with [default: 2048]")
+    help="Number of points to train with [default: 2048]"
+)
 parser.add_argument(
     "-weight_decay",
     type=float,
     default=0,
-    help="L2 regularization coeff [default: 0.0]")
+    help="L2 regularization coeff [default: 0.0]"
+)
 parser.add_argument(
     "-lr",
     type=float,
     default=1e-2,
-    help="Initial learning rate [default: 1e-2]")
+    help="Initial learning rate [default: 1e-2]"
+)
 parser.add_argument(
     "-lr_decay",
     type=float,
     default=0.5,
-    help="Learning rate decay gamma [default: 0.5]")
+    help="Learning rate decay gamma [default: 0.5]"
+)
 parser.add_argument(
     "-decay_step",
     type=int,
     default=20,
-    help="Learning rate decay step [default: 20]")
+    help="Learning rate decay step [default: 20]"
+)
 parser.add_argument(
     "-bn_momentum",
     type=float,
     default=0.9,
-    help="Initial batch norm momentum [default: 0.9]")
+    help="Initial batch norm momentum [default: 0.9]"
+)
 parser.add_argument(
     "-bn_decay",
     type=float,
     default=0.5,
-    help="Batch norm momentum decay gamma [default: 0.5]")
+    help="Batch norm momentum decay gamma [default: 0.5]"
+)
 parser.add_argument(
-    "-checkpoint", type=str, default=None, help="Checkpoint to start from")
+    "-checkpoint", type=str, default=None, help="Checkpoint to start from"
+)
 parser.add_argument(
-    "-epochs", type=int, default=200, help="Number of epochs to train for")
+    "-epochs", type=int, default=200, help="Number of epochs to train for"
+)
 parser.add_argument(
     "-run_name",
     type=str,
     default="sem_seg_run_1",
-    help="Name for run in tensorboard_logger")
+    help="Name for run in tensorboard_logger"
+)
 
 BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')
 
@@ -74,13 +85,15 @@ if __name__ == "__main__":
     tb_log.configure('runs/{}'.format(args.run_name))
 
     test_set = Indoor3DSemSeg(
-        args.num_points, BASE_DIR, train=False, data_precent=0.01)
+        args.num_points, BASE_DIR, train=False, data_precent=0.01
+    )
     test_loader = DataLoader(
         test_set,
         batch_size=args.batch_size,
         shuffle=True,
         pin_memory=True,
-        num_workers=2)
+        num_workers=2
+    )
 
     train_set = Indoor3DSemSeg(args.num_points, BASE_DIR, data_precent=1.0)
     train_loader = DataLoader(
@@ -88,12 +101,14 @@ if __name__ == "__main__":
         batch_size=args.batch_size,
         pin_memory=True,
         num_workers=2,
-        shuffle=True)
+        shuffle=True
+    )
 
     model = Pointnet(num_classes=13)
     model.cuda()
     optimizer = optim.Adam(
-        model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
+        model.parameters(), lr=args.lr, weight_decay=args.weight_decay
+    )
 
     lr_lbmd = lambda e: max(args.lr_decay**(e // args.decay_step), lr_clip / args.lr)
     bnm_lmbd = lambda e: max(args.bn_momentum * args.bn_decay**(e // args.decay_step), bnm_clip)
@@ -106,12 +121,15 @@ if __name__ == "__main__":
         best_loss = 1e10
     else:
         start_epoch, best_loss = pt_utils.load_checkpoint(
-            model, optimizer, filename=args.checkpoint.split(".")[0])
+            model, optimizer, filename=args.checkpoint.split(".")[0]
+        )
 
         lr_scheduler = lr_sched.LambdaLR(
-            optimizer, lr_lbmd, last_epoch=start_epoch)
+            optimizer, lr_lbmd, last_epoch=start_epoch
+        )
         bnm_scheduler = pt_utils.BNMomentumScheduler(
-            model, bnm_lmbd, last_epoch=start_epoch)
+            model, bnm_lmbd, last_epoch=start_epoch
+        )
 
     model_fn = model_fn_decorator(nn.CrossEntropyLoss())
 
@@ -123,14 +141,16 @@ if __name__ == "__main__":
         best_name="sem_seg_best",
         lr_scheduler=lr_scheduler,
         bnm_scheduler=bnm_scheduler,
-        eval_frequency=10)
+        eval_frequency=10
+    )
 
     trainer.train(
         start_epoch,
         args.epochs,
         train_loader,
         test_loader,
-        best_loss=best_loss)
+        best_loss=best_loss
+    )
 
     if start_epoch == args.epochs:
         test_loader.dataset.data_precent = 1.0
diff --git a/utils/build_ffi.py b/utils/build_ffi.py
index 983378c..e893f4f 100644
--- a/utils/build_ffi.py
+++ b/utils/build_ffi.py
@@ -9,7 +9,8 @@ base_dir = osp.dirname(osp.abspath(__file__))
 
 def parse_args():
     parser = argparse.ArgumentParser(
-        description="Arguments for building pointnet2 ffi extension")
+        description="Arguments for building pointnet2 ffi extension"
+    )
     parser.add_argument("--objs", nargs="*")
     clean_arg = parser.add_mutually_exclusive_group()
     clean_arg.add_argument("--build", dest='build', action="store_true")
@@ -27,7 +28,7 @@ def build(args):
     extra_objects += [a for a in glob.glob('/usr/local/cuda/lib64/*.a')]
 
     ffi = create_extension(
-        '_ext',
+        '_ext.pointnet2',
         headers=[a for a in glob.glob("cinclude/*_wrapper.h")],
         sources=[a for a in glob.glob("csrc/*.c")],
         define_macros=[('WITH_CUDA', None)],
@@ -36,12 +37,15 @@ def build(args):
         extra_objects=extra_objects,
         include_dirs=[osp.join(base_dir, 'cinclude')],
         verbose=False,
-        package=False)
+        package=False
+    )
     ffi.build()
 
+
 def clean(args):
     shutil.rmtree(osp.join(base_dir, "_ext"))
 
+
 if __name__ == "__main__":
     args = parse_args()
     if args.clean:
diff --git a/utils/csrc/ball_query.c b/utils/csrc/ball_query.c
index aad1a35..2af2d03 100644
--- a/utils/csrc/ball_query.c
+++ b/utils/csrc/ball_query.c
@@ -8,13 +8,13 @@ int ball_query_wrapper(int b, int n, int m, float radius, int nsample,
 		       THCudaTensor *new_xyz_tensor, THCudaTensor *xyz_tensor,
 		       THCudaIntTensor *idx_tensor) {
 
-	const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor);
-	const float *xyz = THCudaTensor_data(state, xyz_tensor);
-	int *idx = THCudaIntTensor_data(state, idx_tensor);
+    const float *new_xyz = THCudaTensor_data(state, new_xyz_tensor);
+    const float *xyz = THCudaTensor_data(state, xyz_tensor);
+    int *idx = THCudaIntTensor_data(state, idx_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	query_ball_point_kernel_wrapper(b, n, m, radius, nsample, new_xyz, xyz,
-					idx, stream);
-	return 1;
+    query_ball_point_kernel_wrapper(b, n, m, radius, nsample, new_xyz, xyz, idx,
+				    stream);
+    return 1;
 }
diff --git a/utils/csrc/ball_query_gpu.cu b/utils/csrc/ball_query_gpu.cu
index 215d756..a8f1170 100644
--- a/utils/csrc/ball_query_gpu.cu
+++ b/utils/csrc/ball_query_gpu.cu
@@ -11,38 +11,37 @@ __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
 					int nsample,
 					const float *__restrict__ new_xyz,
 					const float *__restrict__ xyz,
-					int * __restrict__ idx) {
-	int batch_index = blockIdx.x;
-	xyz += batch_index * n * 3;
-	new_xyz += batch_index * m * 3;
-	idx += m * nsample * batch_index;
+					int *__restrict__ idx) {
+    int batch_index = blockIdx.x;
+    xyz += batch_index * n * 3;
+    new_xyz += batch_index * m * 3;
+    idx += m * nsample * batch_index;
 
-	int index = threadIdx.x;
-	int stride = blockDim.x;
+    int index = threadIdx.x;
+    int stride = blockDim.x;
 
-	float radius2 = radius * radius;
-	for (int j = index; j < m; j += stride) {
-		float new_x = new_xyz[j * 3 + 0];
-		float new_y = new_xyz[j * 3 + 1];
-		float new_z = new_xyz[j * 3 + 2];
-		for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
-			float x = xyz[k * 3 + 0];
-			float y = xyz[k * 3 + 1];
-			float z = xyz[k * 3 + 2];
-			float d2 = (new_x - x) * (new_x - x) +
-				   (new_y - y) * (new_y - y) +
-				   (new_z - z) * (new_z - z);
-			if (d2 < radius2) {
-				if (cnt == 0) {
-					for (int l = 0; l < nsample; ++l) {
-						idx[j * nsample + l] = k;
-					}
-				}
-				idx[j * nsample + cnt] = k;
-				++cnt;
-			}
+    float radius2 = radius * radius;
+    for (int j = index; j < m; j += stride) {
+	float new_x = new_xyz[j * 3 + 0];
+	float new_y = new_xyz[j * 3 + 1];
+	float new_z = new_xyz[j * 3 + 2];
+	for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
+	    float x = xyz[k * 3 + 0];
+	    float y = xyz[k * 3 + 1];
+	    float z = xyz[k * 3 + 2];
+	    float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
+		       (new_z - z) * (new_z - z);
+	    if (d2 < radius2) {
+		if (cnt == 0) {
+		    for (int l = 0; l < nsample; ++l) {
+			idx[j * nsample + l] = k;
+		    }
 		}
+		idx[j * nsample + cnt] = k;
+		++cnt;
+	    }
 	}
+    }
 }
 
 void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
@@ -50,14 +49,13 @@ void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
 				     const float *xyz, int *idx,
 				     cudaStream_t stream) {
 
-	cudaError_t err;
-	query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
-	    b, n, m, radius, nsample, new_xyz, xyz, idx);
+    cudaError_t err;
+    query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
+	b, n, m, radius, nsample, new_xyz, xyz, idx);
 
-	err = cudaGetLastError();
-	if (cudaSuccess != err) {
-		fprintf(stderr, "CUDA kernel failed : %s\n",
-			cudaGetErrorString(err));
-		exit(-1);
-	}
+    err = cudaGetLastError();
+    if (cudaSuccess != err) {
+	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
+	exit(-1);
+    }
 }
diff --git a/utils/csrc/group_points.c b/utils/csrc/group_points.c
index 61f4643..95a1de4 100644
--- a/utils/csrc/group_points.c
+++ b/utils/csrc/group_points.c
@@ -9,15 +9,15 @@ int group_points_wrapper(int b, int n, int c, int npoints, int nsample,
 			 THCudaIntTensor *idx_tensor,
 			 THCudaTensor *out_tensor) {
 
-	const float *points = THCudaTensor_data(state, points_tensor);
-	const int *idx = THCudaIntTensor_data(state, idx_tensor);
-	float *out = THCudaTensor_data(state, out_tensor);
+    const float *points = THCudaTensor_data(state, points_tensor);
+    const int *idx = THCudaIntTensor_data(state, idx_tensor);
+    float *out = THCudaTensor_data(state, out_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	group_points_kernel_wrapper(b, n, c, npoints, nsample, points, idx, out,
-				    stream);
-	return 1;
+    group_points_kernel_wrapper(b, n, c, npoints, nsample, points, idx, out,
+				stream);
+    return 1;
 }
 
 int group_points_grad_wrapper(int b, int n, int c, int npoints, int nsample,
@@ -25,13 +25,13 @@ int group_points_grad_wrapper(int b, int n, int c, int npoints, int nsample,
 			      THCudaIntTensor *idx_tensor,
 			      THCudaTensor *grad_points_tensor) {
 
-	float *grad_points = THCudaTensor_data(state, grad_points_tensor);
-	const int *idx = THCudaIntTensor_data(state, idx_tensor);
-	const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
+    float *grad_points = THCudaTensor_data(state, grad_points_tensor);
+    const int *idx = THCudaIntTensor_data(state, idx_tensor);
+    const float *grad_out = THCudaTensor_data(state, grad_out_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	group_points_grad_kernel_wrapper(b, n, c, npoints, nsample, grad_out,
-					 idx, grad_points, stream);
-	return 1;
+    group_points_grad_kernel_wrapper(b, n, c, npoints, nsample, grad_out, idx,
+				     grad_points, stream);
+    return 1;
 }
diff --git a/utils/csrc/group_points_gpu.cu b/utils/csrc/group_points_gpu.cu
index 26852ea..bbbab2b 100644
--- a/utils/csrc/group_points_gpu.cu
+++ b/utils/csrc/group_points_gpu.cu
@@ -1,86 +1,83 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include "group_points_gpu.h"
 #include "cuda_utils.h"
+#include "group_points_gpu.h"
 
 // input: points(b, n, c) idx(b, npoints, nsample)
 // output: out(b, npoints, nsample, c)
 __global__ void group_points_kernel(int b, int n, int c, int npoints,
 				    int nsample,
-				    const float  *__restrict__ points,
-				    const int  *__restrict__ idx,
-				    float  *__restrict__ out) {
-	int batch_index = blockIdx.x;
-	points += batch_index * n * c;
-	idx += batch_index * npoints * nsample;
-	out += batch_index * npoints * nsample * c;
+				    const float *__restrict__ points,
+				    const int *__restrict__ idx,
+				    float *__restrict__ out) {
+    int batch_index = blockIdx.x;
+    points += batch_index * n * c;
+    idx += batch_index * npoints * nsample;
+    out += batch_index * npoints * nsample * c;
 
-	int index = threadIdx.x;
-	int stride = blockDim.x;
-	for (int j = index; j < npoints; j += stride) {
-		for (int k = 0; k < nsample; ++k) {
-			int ii = idx[j * nsample + k];
-			memcpy(out + j * nsample * c + k * c, points + ii * c,
-			       sizeof(float) * c);
-		}
+    int index = threadIdx.x;
+    int stride = blockDim.x;
+    for (int j = index; j < npoints; j += stride) {
+	for (int k = 0; k < nsample; ++k) {
+	    int ii = idx[j * nsample + k];
+	    memcpy(out + j * nsample * c + k * c, points + ii * c,
+		   sizeof(float) * c);
 	}
+    }
 }
 
 void group_points_kernel_wrapper(int b, int n, int c, int npoints, int nsample,
 				 const float *points, const int *idx,
 				 float *out, cudaStream_t stream) {
 
-	cudaError_t err;
-	group_points_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
-	    b, n, c, npoints, nsample, points, idx, out);
+    cudaError_t err;
+    group_points_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
+	b, n, c, npoints, nsample, points, idx, out);
 
-	err = cudaGetLastError();
-	if (cudaSuccess != err) {
-		fprintf(stderr, "CUDA kernel failed : %s\n",
-			cudaGetErrorString(err));
-		exit(-1);
-	}
+    err = cudaGetLastError();
+    if (cudaSuccess != err) {
+	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
+	exit(-1);
+    }
 }
 
 // input: grad_out(b, npoints, nsample, c), idx(b, npoints, nsample)
 // output: grad_points(b, n, c)
 __global__ void group_points_grad_kernel(int b, int n, int c, int npoints,
 					 int nsample,
-					 const float  *__restrict__ grad_out,
-					 const int  *__restrict__ idx,
-					 float  *__restrict__ grad_points) {
-	int batch_index = blockIdx.x;
-	grad_points += batch_index * n * c;
-	idx += batch_index * npoints * nsample;
-	grad_out += batch_index * npoints * nsample * c;
+					 const float *__restrict__ grad_out,
+					 const int *__restrict__ idx,
+					 float *__restrict__ grad_points) {
+    int batch_index = blockIdx.x;
+    grad_points += batch_index * n * c;
+    idx += batch_index * npoints * nsample;
+    grad_out += batch_index * npoints * nsample * c;
 
-	int index = threadIdx.x;
-	int stride = blockDim.x;
-	for (int j = index; j < npoints; j += stride) {
-		for (int k = 0; k < nsample; ++k) {
-			int ii = idx[j * nsample + k];
-			for (int l = 0; l < c; ++l) {
-				atomicAdd(
-				    grad_points + ii * c + l,
-				    grad_out[j * nsample * c + k * c + l]);
-			}
-		}
+    int index = threadIdx.x;
+    int stride = blockDim.x;
+    for (int j = index; j < npoints; j += stride) {
+	for (int k = 0; k < nsample; ++k) {
+	    int ii = idx[j * nsample + k];
+	    for (int l = 0; l < c; ++l) {
+		atomicAdd(grad_points + ii * c + l,
+			  grad_out[j * nsample * c + k * c + l]);
+	    }
 	}
+    }
 }
 
 void group_points_grad_kernel_wrapper(int b, int n, int c, int npoints,
 				      int nsample, const float *grad_out,
 				      const int *idx, float *grad_points,
 				      cudaStream_t stream) {
-	cudaError_t err;
-	group_points_grad_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
-	    b, n, c, npoints, nsample, grad_out, idx, grad_points);
+    cudaError_t err;
+    group_points_grad_kernel<<<b, opt_n_threads(npoints), 0, stream>>>(
+	b, n, c, npoints, nsample, grad_out, idx, grad_points);
 
-	err = cudaGetLastError();
-	if (cudaSuccess != err) {
-		fprintf(stderr, "CUDA kernel failed : %s\n",
-			cudaGetErrorString(err));
-		exit(-1);
-	}
+    err = cudaGetLastError();
+    if (cudaSuccess != err) {
+	fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
+	exit(-1);
+    }
 }
diff --git a/utils/csrc/interpolate_gpu.cu b/utils/csrc/interpolate_gpu.cu
index aaf313b..de2db5e 100644
--- a/utils/csrc/interpolate_gpu.cu
+++ b/utils/csrc/interpolate_gpu.cu
@@ -2,8 +2,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include "interpolate_gpu.h"
 #include "cuda_utils.h"
+#include "interpolate_gpu.h"
 
 // input: unknown(b, n, 3) known(b, m, 3)
 // output: dist2(b, n, 3), idx(b, n, 3)
diff --git a/utils/csrc/roi_mask_points_wrapper.c b/utils/csrc/roi_mask_points_wrapper.c
index 01310ca..7ce6ddf 100644
--- a/utils/csrc/roi_mask_points_wrapper.c
+++ b/utils/csrc/roi_mask_points_wrapper.c
@@ -9,17 +9,17 @@ int roi_mask_wrapper(int n_roi, int b, int n, THCudaTensor *rois_tensor,
 		     THCudaTensor *data_xyz_tensor,
 		     THCudaByteTensor *mask_tensor) {
 
-	const float *rois = THCudaTensor_data(state, rois_tensor);
-	const long *batch_indices =
-	    THCudaLongTensor_data(state, batch_indices_tensor);
-	const float *data_xyz = THCudaTensor_data(state, data_xyz_tensor);
-	unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
+    const float *rois = THCudaTensor_data(state, rois_tensor);
+    const long *batch_indices =
+	THCudaLongTensor_data(state, batch_indices_tensor);
+    const float *data_xyz = THCudaTensor_data(state, data_xyz_tensor);
+    unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	roi_mask_kernel_wrapper(n_roi, b, n, rois, batch_indices, data_xyz,
-				mask, stream);
-	return 1;
+    roi_mask_kernel_wrapper(n_roi, b, n, rois, batch_indices, data_xyz, mask,
+			    stream);
+    return 1;
 }
 
 int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d,
@@ -28,17 +28,17 @@ int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d,
 				 THCudaTensor *points_tensor,
 				 THCudaTensor *descriptors_tensor) {
 
-	const long *batch_indices =
-	    THCudaLongTensor_data(state, batch_indices_tensor);
-	const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
-	const float *points = THCudaTensor_data(state, points_tensor);
-	float *descriptors = THCudaTensor_data(state, descriptors_tensor);
+    const long *batch_indices =
+	THCudaLongTensor_data(state, batch_indices_tensor);
+    const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
+    const float *points = THCudaTensor_data(state, points_tensor);
+    float *descriptors = THCudaTensor_data(state, descriptors_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
-	roi_avg_pool_kernel_forward_wrapper(n_roi, b, n, d, mask, batch_indices,
-					    points, descriptors, stream);
+    cudaStream_t stream = THCState_getCurrentStream(state);
+    roi_avg_pool_kernel_forward_wrapper(n_roi, b, n, d, mask, batch_indices,
+					points, descriptors, stream);
 
-	return 1;
+    return 1;
 }
 
 int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d,
@@ -47,17 +47,16 @@ int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d,
 				  THCudaTensor *grad_descriptors_tensor,
 				  THCudaTensor *grad_points_tensor) {
 
-	const long *batch_indices =
-	    THCudaLongTensor_data(state, batch_indices_tensor);
-	const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
-	const float *grad_descriptors =
-	    THCudaTensor_data(state, grad_descriptors_tensor);
-	float *grad_points = THCudaTensor_data(state, grad_points_tensor);
+    const long *batch_indices =
+	THCudaLongTensor_data(state, batch_indices_tensor);
+    const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor);
+    const float *grad_descriptors =
+	THCudaTensor_data(state, grad_descriptors_tensor);
+    float *grad_points = THCudaTensor_data(state, grad_points_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
-	roi_avg_pool_kernel_backward_wrapper(n_roi, b, n, d, mask,
-					     batch_indices, grad_descriptors,
-					     grad_points, stream);
+    cudaStream_t stream = THCState_getCurrentStream(state);
+    roi_avg_pool_kernel_backward_wrapper(n_roi, b, n, d, mask, batch_indices,
+					 grad_descriptors, grad_points, stream);
 
-	return 1;
+    return 1;
 }
diff --git a/utils/csrc/sampling.c b/utils/csrc/sampling.c
index b717b2f..4db8d17 100644
--- a/utils/csrc/sampling.c
+++ b/utils/csrc/sampling.c
@@ -9,15 +9,14 @@ int gather_points_wrapper(int b, int n, int c, int npoints,
 			  THCudaIntTensor *idx_tensor,
 			  THCudaTensor *out_tensor) {
 
-	const float *points = THCudaTensor_data(state, points_tensor);
-	const int *idx = THCudaIntTensor_data(state, idx_tensor);
-	float *out = THCudaTensor_data(state, out_tensor);
+    const float *points = THCudaTensor_data(state, points_tensor);
+    const int *idx = THCudaIntTensor_data(state, idx_tensor);
+    float *out = THCudaTensor_data(state, out_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	gather_points_kernel_wrapper(b, n, c, npoints, points, idx, out,
-				     stream);
-	return 1;
+    gather_points_kernel_wrapper(b, n, c, npoints, points, idx, out, stream);
+    return 1;
 }
 
 int furthest_point_sampling_wrapper(int b, int n, int m,
@@ -25,13 +24,12 @@ int furthest_point_sampling_wrapper(int b, int n, int m,
 				    THCudaTensor *temp_tensor,
 				    THCudaIntTensor *idx_tensor) {
 
-	const float *points = THCudaTensor_data(state, points_tensor);
-	float *temp = THCudaTensor_data(state, temp_tensor);
-	int *idx = THCudaIntTensor_data(state, idx_tensor);
+    const float *points = THCudaTensor_data(state, points_tensor);
+    float *temp = THCudaTensor_data(state, temp_tensor);
+    int *idx = THCudaIntTensor_data(state, idx_tensor);
 
-	cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = THCState_getCurrentStream(state);
 
-	furthest_point_sampling_kernel_wrapper(b, n, m, points, temp, idx,
-					       stream);
-	return 1;
+    furthest_point_sampling_kernel_wrapper(b, n, m, points, temp, idx, stream);
+    return 1;
 }
diff --git a/utils/data_utils.py b/utils/data_utils.py
index 168025e..71c368d 100644
--- a/utils/data_utils.py
+++ b/utils/data_utils.py
@@ -3,17 +3,19 @@ import numpy as np
 
 
 class PointcloudScale(object):
+
     def __init__(self, mean=2.0, std=1.0, clip=1.8):
         self.mean, self.std, self.clip = mean, std, clip
 
     def __call__(self, points):
         scaler = points.new(1).normal_(
-            mean=self.mean, std=self.std).clamp_(
-                max(self.mean - self.clip, 0.01), self.mean + self.clip)
+            mean=self.mean, std=self.std
+        ).clamp_(max(self.mean - self.clip, 0.01), self.mean + self.clip)
         return scaler * points
 
 
 class PointcloudRotate(object):
+
     def __init__(self, x_axis=False, z_axis=True):
         assert x_axis or z_axis
         self.x, self.z = x_axis, z_axis
@@ -46,25 +48,30 @@ class PointcloudRotate(object):
 
 
 class PointcloudJitter(object):
+
     def __init__(self, std=0.01, clip=0.03):
         self.std, self.clip = std, clip
 
     def __call__(self, points):
         jittered_data = points.new(*points.size()).normal_(
-            mean=0.0, std=self.std).clamp_(-self.clip, self.clip)
+            mean=0.0, std=self.std
+        ).clamp_(-self.clip, self.clip)
         return points + jittered_data
 
 
 class PointcloudTranslate(object):
+
     def __init__(self, std=1.0, clip=3.0):
         self.std, self.clip = std, clip
 
     def __call__(self, points):
         translation = points.new(3).normal_(
-            mean=0.0, std=self.std).clamp_(-self.clip, self.clip)
+            mean=0.0, std=self.std
+        ).clamp_(-self.clip, self.clip)
         return points + translation
 
 
 class PointcloudToTensor(object):
+
     def __call__(self, points):
         return torch.from_numpy(points).float()
diff --git a/utils/linalg_utils.py b/utils/linalg_utils.py
index 8a77b99..203518f 100644
--- a/utils/linalg_utils.py
+++ b/utils/linalg_utils.py
@@ -4,9 +4,11 @@ from enum import Enum
 PDist2Order = Enum('PDist2Order', 'd_first d_second')
 
 
-def pdist2(X: torch.Tensor,
-           Z: torch.Tensor = None,
-           order: PDist2Order = PDist2Order.d_second) -> torch.Tensor:
+def pdist2(
+        X: torch.Tensor,
+        Z: torch.Tensor = None,
+        order: PDist2Order = PDist2Order.d_second
+) -> torch.Tensor:
     r""" Calculates the pairwise distance between X and Z
 
     D[b, i, j] = l2 distance X[b, i] and Z[b, j]
diff --git a/utils/pointnet2_modules.py b/utils/pointnet2_modules.py
index 7900f0b..68ec4ac 100644
--- a/utils/pointnet2_modules.py
+++ b/utils/pointnet2_modules.py
@@ -24,13 +24,15 @@ class PointnetSAModuleMSG(nn.Module):
         Use batchnorm
     """
 
-    def __init__(self,
-                 *,
-                 npoint: int,
-                 radii: List[float],
-                 nsamples: List[int],
-                 mlps: List[List[int]],
-                 bn: bool = True):
+    def __init__(
+            self,
+            *,
+            npoint: int,
+            radii: List[float],
+            nsamples: List[int],
+            mlps: List[List[int]],
+            bn: bool = True
+    ):
         super().__init__()
 
         assert len(radii) == len(nsamples) == len(mlps)
@@ -41,8 +43,7 @@ class PointnetSAModuleMSG(nn.Module):
         for i in range(len(radii)):
             radius = radii[i]
             nsample = nsamples[i]
-            self.groupers.append(
-                pointnet2_utils.QueryAndGroup(radius, nsample))
+            self.groupers.append(pointnet2_utils.QueryAndGroup(radius, nsample))
             mlp_spec = mlps[i]
             self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn))
 
@@ -66,18 +67,20 @@ class PointnetSAModuleMSG(nn.Module):
 
         new_points_list = []
         new_xyz = pointnet2_utils.gather_points(
-            xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint))
+            xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint)
+        )
         for i in range(len(self.groupers)):
             new_points = self.groupers[i](xyz, new_xyz, points)
 
-            new_points = self.mlps[i](new_points.permute(
-                0, 3, 1, 2))  # (B, mlp[-1], npoint, nsample)
+            new_points = self.mlps[i](new_points.permute(0, 3, 1, 2)
+                                     )  # (B, mlp[-1], npoint, nsample)
             new_points = F.max_pool2d(
-                new_points,
-                kernel_size=[1, new_points.size(3)])  # (B, mlp[-1], npoint, 1)
+                new_points, kernel_size=[1, new_points.size(3)]
+            )  # (B, mlp[-1], npoint, 1)
             new_points = new_points.squeeze(-1)  # (B, mlp[-1], npoint)
             new_points = new_points.transpose(
-                1, 2).contiguous()  # (B, npoint, mlp[-1])
+                1, 2
+            ).contiguous()  # (B, npoint, mlp[-1])
 
             new_points_list.append(new_points)
 
@@ -101,13 +104,15 @@ class PointnetSAModule(nn.Module):
         Use batchnorm
     """
 
-    def __init__(self,
-                 *,
-                 mlp: List[int],
-                 npoint: int = None,
-                 radius: float = None,
-                 nsample: int = None,
-                 bn: bool = True):
+    def __init__(
+            self,
+            *,
+            mlp: List[int],
+            npoint: int = None,
+            radius: float = None,
+            nsample: int = None,
+            bn: bool = True
+    ):
         super().__init__()
         self.npoint = npoint
 
@@ -140,21 +145,23 @@ class PointnetSAModule(nn.Module):
 
         if self.npoint is not None:
             new_xyz = pointnet2_utils.gather_points(
-                xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint))
+                xyz, pointnet2_utils.furthest_point_sample(xyz, self.npoint)
+            )
         else:
             new_xyz = xyz.data.new([[[0, 0, 0]]]).expand(xyz.size(0), 1, 3)
 
-        new_points = self.grouper(xyz, new_xyz,
-                                  points)  # (B, npoint, nsample, 3 + C)
+        new_points = self.grouper(
+            xyz, new_xyz, points
+        )  # (B, npoint, nsample, 3 + C)
 
-        new_points = self.mlp(new_points.permute(
-            0, 3, 1, 2))  # (B, mlp[-1], npoint, nsample)
+        new_points = self.mlp(new_points.permute(0, 3, 1, 2)
+                             )  # (B, mlp[-1], npoint, nsample)
         new_points = F.max_pool2d(
-            new_points,
-            kernel_size=[1, new_points.size(3)])  # (B, mlp[-1], npoint, 1)
+            new_points, kernel_size=[1, new_points.size(3)]
+        )  # (B, mlp[-1], npoint, 1)
         new_points = new_points.squeeze(-1)  # (B, mlp[-1], npoint)
-        new_points = new_points.transpose(
-            1, 2).contiguous()  # (B, npoint, mlp[-1])
+        new_points = new_points.transpose(1, 2
+                                         ).contiguous()  # (B, npoint, mlp[-1])
 
         return new_xyz, new_points
 
@@ -174,9 +181,10 @@ class PointnetFPModule(nn.Module):
         super().__init__()
         self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
 
-    def forward(self, unknown: torch.Tensor, known: torch.Tensor,
-                unknow_feats: torch.Tensor,
-                known_feats: torch.Tensor) -> torch.Tensor:
+    def forward(
+            self, unknown: torch.Tensor, known: torch.Tensor,
+            unknow_feats: torch.Tensor, known_feats: torch.Tensor
+    ) -> torch.Tensor:
         r"""
         Parameters
         ----------
@@ -201,19 +209,21 @@ class PointnetFPModule(nn.Module):
         weight = dist_recip / norm
 
         interpolated_feats = pointnet2_utils.three_interpolate(
-            known_feats, idx, weight)
+            known_feats, idx, weight
+        )
         if unknow_feats is not None:
-            new_points = torch.cat(
-                [interpolated_feats, unknow_feats], dim=-1)  #(B, n, C2 + C1)
+            new_points = torch.cat([interpolated_feats, unknow_feats],
+                                   dim=-1)  #(B, n, C2 + C1)
         else:
             new_points = interpolated_feats
 
-        new_points = new_points.unsqueeze(-1).transpose(1,
-                                                        2)  #(B, C2 + C1, n, 1)
+        new_points = new_points.unsqueeze(-1).transpose(
+            1, 2
+        )  #(B, C2 + C1, n, 1)
         new_points = self.mlp(new_points)
 
-        return new_points.squeeze(-1).transpose(
-            1, 2).contiguous()  #(B, n, mlp[-1])
+        return new_points.squeeze(-1).transpose(1, 2
+                                               ).contiguous()  #(B, n, mlp[-1])
 
 
 if __name__ == "__main__":
@@ -224,7 +234,8 @@ if __name__ == "__main__":
     xyz_feats = Variable(torch.randn(2, 10, 6).cuda(), requires_grad=True)
 
     test_module = PointnetSAModuleMSG(
-        npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]])
+        npoint=2, radii=[5.0, 10.0], nsamples=[6, 3], mlps=[[9, 3], [9, 6]]
+    )
     test_module.cuda()
     print(test_module(xyz, xyz_feats))
 
@@ -237,7 +248,6 @@ if __name__ == "__main__":
 
     for _ in range(1):
         _, new_points = test_module(xyz, xyz_feats)
-        new_points.backward(
-            torch.cuda.FloatTensor(*new_points.size()).fill_(1))
+        new_points.backward(torch.cuda.FloatTensor(*new_points.size()).fill_(1))
         print(new_points)
         print(xyz.grad)
diff --git a/utils/pointnet2_utils.py b/utils/pointnet2_utils.py
index 28125e5..b008822 100644
--- a/utils/pointnet2_utils.py
+++ b/utils/pointnet2_utils.py
@@ -5,12 +5,14 @@ import torch.nn.functional as F
 import torch.nn as nn
 from linalg_utils import pdist2, PDist2Order
 from collections import namedtuple
-import _ext as pointnet2
 import pytorch_utils as pt_utils
 from typing import List, Tuple
 
+from _ext import pointnet2
+
 
 class RandomDropout(nn.Module):
+
     def __init__(self, p=0.5, inplace=False):
         super().__init__()
         self.p = p
@@ -18,11 +20,13 @@ class RandomDropout(nn.Module):
 
     def forward(self, X):
         theta = torch.Tensor(1).uniform_(0, self.p)[0]
-        return pt_utils.feature_dropout_no_scaling(X, theta, self.train,
-                                                   self.inplace)
+        return pt_utils.feature_dropout_no_scaling(
+            X, theta, self.train, self.inplace
+        )
 
 
 class FurthestPointSampling(Function):
+
     @staticmethod
     def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
         r"""
@@ -30,16 +34,16 @@ class FurthestPointSampling(Function):
         minimum distance
 
         Parameters
-        ---------
+        ----------
         xyz : torch.Tensor
             (B, N, 3) tensor where N > npoint
         npoint : int32
             number of points in the sampled set
 
         Returns
+        -------
         torch.Tensor
             (B, npoint) tensor containing the set
-        ------
         """
         B, N, _ = xyz.size()
 
@@ -50,8 +54,9 @@ class FurthestPointSampling(Function):
         temp = temp.contiguous()
         output = output.contiguous()
 
-        pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp,
-                                                  output)
+        pointnet2.furthest_point_sampling_wrapper(
+            B, N, npoint, xyz, temp, output
+        )
 
         return output
 
@@ -64,6 +69,7 @@ furthest_point_sample = FurthestPointSampling.apply
 
 
 class GatherPoints(Function):
+
     @staticmethod
     def forward(ctx, points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
         r"""
@@ -71,7 +77,7 @@ class GatherPoints(Function):
         minimum distance
 
         Parameters
-        ---------
+        ----------
         points : torch.Tensor
             (B, N, 3) tensor
 
@@ -79,9 +85,9 @@ class GatherPoints(Function):
             (B, npoint) tensor of the points to gather
 
         Returns
+        -------
         torch.Tensor
             (B, npoint, 3) tensor
-        ------
         """
 
         B, N, C = points.size()
@@ -106,6 +112,7 @@ gather_points = GatherPoints.apply
 
 
 class ThreeNN(Function):
+
     @staticmethod
     def forward(ctx, unknown: torch.Tensor,
                 known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -147,9 +154,11 @@ three_nn = ThreeNN.apply
 
 
 class ThreeInterpolate(Function):
+
     @staticmethod
-    def forward(ctx, points: torch.Tensor, idx: torch.Tensor,
-                weight: torch.Tensor) -> torch.Tensor:
+    def forward(
+            ctx, points: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor
+    ) -> torch.Tensor:
         r"""
             Performs weight linear interpolation on 3 points
         Parameters
@@ -178,14 +187,15 @@ class ThreeInterpolate(Function):
         idx = idx.contiguous()
         weight = weight.contiguous()
         output = output.contiguous()
-        pointnet2.three_interpolate_wrapper(B, m, c, n, points, idx, weight,
-                                            output)
+        pointnet2.three_interpolate_wrapper(
+            B, m, c, n, points, idx, weight, output
+        )
 
         return output
 
     @staticmethod
     def backward(ctx, grad_out: torch.Tensor
-                 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+                ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         r"""
         Parameters
         ----------
@@ -196,6 +206,7 @@ class ThreeInterpolate(Function):
         -------
         grad_points : torch.Tensor
             (B, m, c) tensor with gradients of points
+
         None
 
         None
@@ -209,8 +220,9 @@ class ThreeInterpolate(Function):
         idx = idx.contiguous()
         weight = weight.contiguous()
         grad_points = grad_points.contiguous()
-        pointnet2.three_interpolate_grad_wrapper(B, n, c, m, grad_out.data,
-                                                 idx, weight, grad_points.data)
+        pointnet2.three_interpolate_grad_wrapper(
+            B, n, c, m, grad_out.data, idx, weight, grad_points.data
+        )
 
         return grad_points, None, None
 
@@ -219,6 +231,7 @@ three_interpolate = ThreeInterpolate.apply
 
 
 class GroupPoints(Function):
+
     @staticmethod
     def forward(ctx, points: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
         r"""
@@ -243,8 +256,9 @@ class GroupPoints(Function):
         points = points.contiguous()
         idx = idx.contiguous()
         output = output.contiguous()
-        pointnet2.group_points_wrapper(B, N, C, npoints, nsample, points, idx,
-                                       output)
+        pointnet2.group_points_wrapper(
+            B, N, C, npoints, nsample, points, idx, output
+        )
 
         ctx.idx_N_C_for_backward = (idx, N, C)
         return output
@@ -273,7 +287,8 @@ class GroupPoints(Function):
         grad_out = grad_out.contiguous()
         grad_points = grad_points.contiguous()
         pointnet2.group_points_grad_wrapper(
-            B, N, C, npoint, nsample, grad_out.data, idx, grad_points.data)
+            B, N, C, npoint, nsample, grad_out.data, idx, grad_points.data
+        )
 
         return grad_points, None
 
@@ -282,13 +297,16 @@ group_points = GroupPoints.apply
 
 
 class BallQuery(Function):
+
     @staticmethod
-    def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor,
-                new_xyz: torch.Tensor) -> torch.Tensor:
+    def forward(
+            ctx, radius: float, nsample: int, xyz: torch.Tensor,
+            new_xyz: torch.Tensor
+    ) -> torch.Tensor:
         r"""
 
         Parameters
-        ---------
+        ----------
         radius : float
             radius of the balls
         nsample : int
@@ -299,7 +317,7 @@ class BallQuery(Function):
             (B, npoint, 3) centers of the ball query
 
         Returns
-        ------
+        -------
         torch.Tensor
             (B, npoint, nsample) tensor with the indicies of the points that form the query balls
         """
@@ -311,8 +329,9 @@ class BallQuery(Function):
         new_xyz = new_xyz.contiguous()
         xyz = xyz.contiguous()
         idx = idx.contiguous()
-        pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz,
-                                     xyz, idx)
+        pointnet2.ball_query_wrapper(
+            B, N, npoint, radius, nsample, new_xyz, xyz, idx
+        )
 
         return idx
 
@@ -344,10 +363,11 @@ class QueryAndGroup(nn.Module):
             self,
             xyz: torch.Tensor,
             new_xyz: torch.Tensor,
-            points: torch.Tensor = None) -> Tuple[torch.Tensor]:
+            points: torch.Tensor = None
+    ) -> Tuple[torch.Tensor]:
         r"""
         Parameters
-        ---------
+        ----------
         xyz : torch.Tensor
             xyz coordinates of the points (B, N, 3)
         new_xyz : torch.Tensor
@@ -368,9 +388,8 @@ class QueryAndGroup(nn.Module):
         if points is not None:
             grouped_points = group_points(points, idx)
             if self.use_xyz:
-                new_points = torch.cat(
-                    [grouped_xyz, grouped_points],
-                    dim=-1)  # (B, npoint, nsample, 3 + C)
+                new_points = torch.cat([grouped_xyz, grouped_points],
+                                       dim=-1)  # (B, npoint, nsample, 3 + C)
             else:
                 new_points = group_points
         else:
@@ -395,10 +414,11 @@ class GroupAll(nn.Module):
             self,
             xyz: torch.Tensor,
             new_xyz: torch.Tensor,
-            points: torch.Tensor = None) -> Tuple[torch.Tensor]:
+            points: torch.Tensor = None
+    ) -> Tuple[torch.Tensor]:
         r"""
         Parameters
-        ---------
+        ----------
         xyz : torch.Tensor
             xyz coordinates of the points (B, N, 3)
         new_xyz : torch.Tensor
@@ -414,11 +434,12 @@ class GroupAll(nn.Module):
 
         grouped_xyz = xyz.view(xyz.size(0), 1, xyz.size(1), xyz.size(2))
         if points is not None:
-            grouped_points = points.view(points.size(0), 1, points.size(1), points.size(2))
+            grouped_points = points.view(
+                points.size(0), 1, points.size(1), points.size(2)
+            )
             if self.use_xyz:
-                new_points = torch.cat(
-                    [grouped_xyz, grouped_points],
-                    dim=-1)  # (B, npoint, nsample, 3 + C)
+                new_points = torch.cat([grouped_xyz, grouped_points],
+                                       dim=-1)  # (B, npoint, nsample, 3 + C)
             else:
                 new_points = group_points
         else:
diff --git a/utils/pytorch_utils.py b/utils/pytorch_utils.py
index 26d317d..5f40710 100644
--- a/utils/pytorch_utils.py
+++ b/utils/pytorch_utils.py
@@ -16,48 +16,55 @@ import math
 
 
 class SharedMLP(nn.Sequential):
-    def __init__(self,
-                 args: List[int],
-                 *,
-                 bn: bool = False,
-                 activation=nn.ReLU(inplace=True),
-                 name: str = ""):
+
+    def __init__(
+            self,
+            args: List[int],
+            *,
+            bn: bool = False,
+            activation=nn.ReLU(inplace=True),
+            name: str = ""
+    ):
         super().__init__()
 
         for i in range(len(args) - 1):
-            self.add_module(name + 'layer{}'.format(i),
-                            Conv2d(
-                                args[i],
-                                args[i + 1],
-                                bn=bn,
-                                activation=activation))
+            self.add_module(
+                name + 'layer{}'.format(i),
+                Conv2d(args[i], args[i + 1], bn=bn, activation=activation)
+            )
 
 
 class _ConvBase(nn.Sequential):
-    def __init__(self,
-                 in_size,
-                 out_size,
-                 kernel_size,
-                 stride,
-                 padding,
-                 activation,
-                 bn,
-                 init,
-                 conv=None,
-                 batch_norm=None,
-                 bias=True,
-                 name=""):
+
+    def __init__(
+            self,
+            in_size,
+            out_size,
+            kernel_size,
+            stride,
+            padding,
+            activation,
+            bn,
+            init,
+            conv=None,
+            batch_norm=None,
+            bias=True,
+            name=""
+    ):
         super().__init__()
 
         bias = bias and (not bn)
-        self.add_module(name + 'conv',
-                        conv(
-                            in_size,
-                            out_size,
-                            kernel_size=kernel_size,
-                            stride=stride,
-                            padding=padding,
-                            bias=bias))
+        self.add_module(
+            name + 'conv',
+            conv(
+                in_size,
+                out_size,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                bias=bias
+            )
+        )
         init(self[0].weight)
 
         if bias:
@@ -73,18 +80,21 @@ class _ConvBase(nn.Sequential):
 
 
 class Conv1d(_ConvBase):
-    def __init__(self,
-                 in_size: int,
-                 out_size: int,
-                 *,
-                 kernel_size: int = 1,
-                 stride: int = 1,
-                 padding: int = 0,
-                 activation=nn.ReLU(inplace=True),
-                 bn: bool = False,
-                 init=nn.init.kaiming_normal,
-                 bias: bool = True,
-                 name: str = ""):
+
+    def __init__(
+            self,
+            in_size: int,
+            out_size: int,
+            *,
+            kernel_size: int = 1,
+            stride: int = 1,
+            padding: int = 0,
+            activation=nn.ReLU(inplace=True),
+            bn: bool = False,
+            init=nn.init.kaiming_normal,
+            bias: bool = True,
+            name: str = ""
+    ):
         super().__init__(
             in_size,
             out_size,
@@ -97,22 +107,26 @@ class Conv1d(_ConvBase):
             conv=nn.Conv1d,
             batch_norm=nn.BatchNorm1d,
             bias=bias,
-            name=name)
+            name=name
+        )
 
 
 class Conv2d(_ConvBase):
-    def __init__(self,
-                 in_size: int,
-                 out_size: int,
-                 *,
-                 kernel_size: Tuple[int, int] = (1, 1),
-                 stride: Tuple[int, int] = (1, 1),
-                 padding: Tuple[int, int] = (0, 0),
-                 activation=nn.ReLU(inplace=True),
-                 bn: bool = False,
-                 init=nn.init.kaiming_normal,
-                 bias: bool = True,
-                 name: str = ""):
+
+    def __init__(
+            self,
+            in_size: int,
+            out_size: int,
+            *,
+            kernel_size: Tuple[int, int] = (1, 1),
+            stride: Tuple[int, int] = (1, 1),
+            padding: Tuple[int, int] = (0, 0),
+            activation=nn.ReLU(inplace=True),
+            bn: bool = False,
+            init=nn.init.kaiming_normal,
+            bias: bool = True,
+            name: str = ""
+    ):
         super().__init__(
             in_size,
             out_size,
@@ -125,22 +139,26 @@ class Conv2d(_ConvBase):
             conv=nn.Conv2d,
             batch_norm=nn.BatchNorm2d,
             bias=bias,
-            name=name)
+            name=name
+        )
 
 
 class Conv3d(_ConvBase):
-    def __init__(self,
-                 in_size: int,
-                 out_size: int,
-                 *,
-                 kernel_size: Tuple[int, int, int] = (1, 1, 1),
-                 stride: Tuple[int, int, int] = (1, 1, 1),
-                 padding: Tuple[int, int, int] = (0, 0, 0),
-                 activation=nn.ReLU(inplace=True),
-                 bn: bool = False,
-                 init=nn.init.kaiming_normal,
-                 bias: bool = True,
-                 name: str = ""):
+
+    def __init__(
+            self,
+            in_size: int,
+            out_size: int,
+            *,
+            kernel_size: Tuple[int, int, int] = (1, 1, 1),
+            stride: Tuple[int, int, int] = (1, 1, 1),
+            padding: Tuple[int, int, int] = (0, 0, 0),
+            activation=nn.ReLU(inplace=True),
+            bn: bool = False,
+            init=nn.init.kaiming_normal,
+            bias: bool = True,
+            name: str = ""
+    ):
         super().__init__(
             in_size,
             out_size,
@@ -153,18 +171,22 @@ class Conv3d(_ConvBase):
             conv=nn.Conv3d,
             batch_norm=nn.BatchNorm3d,
             bias=bias,
-            name=name)
+            name=name
+        )
 
 
 class FC(nn.Sequential):
-    def __init__(self,
-                 in_size: int,
-                 out_size: int,
-                 *,
-                 activation=nn.ReLU(inplace=True),
-                 bn: bool = False,
-                 init=None,
-                 name: str = ""):
+
+    def __init__(
+            self,
+            in_size: int,
+            out_size: int,
+            *,
+            activation=nn.ReLU(inplace=True),
+            bn: bool = False,
+            init=None,
+            name: str = ""
+    ):
         super().__init__()
         self.add_module(name + 'fc', nn.Linear(in_size, out_size, bias=not bn))
         if init is not None:
@@ -183,6 +205,7 @@ class FC(nn.Sequential):
 
 
 class _DropoutNoScaling(InplaceFunction):
+
     @staticmethod
     def _make_noise(input):
         return input.new().resize_as_(input)
@@ -192,8 +215,9 @@ class _DropoutNoScaling(InplaceFunction):
         if inplace:
             return None
         n = g.appendNode(
-            g.create("Dropout", [input]).f_("ratio", p).i_(
-                "is_test", not train))
+            g.create("Dropout", [input]).f_("ratio",
+                                            p).i_("is_test", not train)
+        )
         real = g.appendNode(g.createSelect(n, 0))
         g.appendNode(g.createSelect(n, 1))
         return real
@@ -201,8 +225,10 @@ class _DropoutNoScaling(InplaceFunction):
     @classmethod
     def forward(cls, ctx, input, p=0.5, train=False, inplace=False):
         if p < 0 or p > 1:
-            raise ValueError("dropout probability has to be between 0 and 1, "
-                             "but got {}".format(p))
+            raise ValueError(
+                "dropout probability has to be between 0 and 1, "
+                "but got {}".format(p)
+            )
         ctx.p = p
         ctx.train = train
         ctx.inplace = inplace
@@ -236,6 +262,7 @@ dropout_no_scaling = _DropoutNoScaling.apply
 
 
 class _FeatureDropoutNoScaling(_DropoutNoScaling):
+
     @staticmethod
     def symbolic(input, p=0.5, train=False, inplace=False):
         return None
@@ -244,7 +271,8 @@ class _FeatureDropoutNoScaling(_DropoutNoScaling):
     def _make_noise(input):
         return input.new().resize_(
             input.size(0), input.size(1), *repeat(1,
-                                                  input.dim() - 2))
+                                                  input.dim() - 2)
+        )
 
 
 feature_dropout_no_scaling = _FeatureDropoutNoScaling.apply
@@ -252,21 +280,17 @@ feature_dropout_no_scaling = _FeatureDropoutNoScaling.apply
 
 def checkpoint_state(model=None, optimizer=None, best_prec=None, epoch=None):
     return {
-        'epoch':
-        epoch,
-        'best_prec':
-        best_prec,
-        'model_state':
-        model.state_dict() if model is not None else None,
-        'optimizer_state':
-        optimizer.state_dict() if optimizer is not None else None
+        'epoch': epoch,
+        'best_prec': best_prec,
+        'model_state': model.state_dict() if model is not None else None,
+        'optimizer_state': optimizer.state_dict()
+        if optimizer is not None else None
     }
 
 
-def save_checkpoint(state,
-                    is_best,
-                    filename='checkpoint',
-                    bestname='model_best'):
+def save_checkpoint(
+        state, is_best, filename='checkpoint', bestname='model_best'
+):
     filename = '{}.pth.tar'.format(filename)
     torch.save(state, filename)
     if is_best:
@@ -325,7 +349,8 @@ def variable_size_collate(pad_val=0, use_shared_memory=True):
 
             out = out.view(
                 len(batch), max_len,
-                *[batch[0].size(i) for i in range(1, batch[0].dim())])
+                *[batch[0].size(i) for i in range(1, batch[0].dim())]
+            )
             out.fill_(pad_val)
             for i in range(len(batch)):
                 out[i, 0:batch[i].size(0)] = batch[i]
@@ -342,8 +367,9 @@ def variable_size_collate(pad_val=0, use_shared_memory=True):
                 return wrapped([torch.from_numpy(b) for b in batch])
             if elem.shape == ():  # scalars
                 py_type = float if elem.dtype.name.startswith('float') else int
-                return _numpy_type_map[elem.dtype.name](list(
-                    map(py_type, batch)))
+                return _numpy_type_map[elem.dtype.name](
+                    list(map(py_type, batch))
+                )
         elif isinstance(batch[0], int):
             return torch.LongTensor(batch)
         elif isinstance(batch[0], float):
@@ -372,19 +398,19 @@ class TrainValSplitter():
             Whether or not shuffle which data goes to which split
     """
 
-    def __init__(self,
-                 *,
-                 numel: int,
-                 percent_train: float,
-                 shuffled: bool = False):
+    def __init__(
+            self, *, numel: int, percent_train: float, shuffled: bool = False
+    ):
         indicies = np.array([i for i in range(numel)])
         if shuffled:
             np.random.shuffle(indicies)
 
         self.train = torch.utils.data.sampler.SubsetRandomSampler(
-            indicies[0:int(percent_train * numel)])
+            indicies[0:int(percent_train * numel)]
+        )
         self.val = torch.utils.data.sampler.SubsetRandomSampler(
-            indicies[int(percent_train * numel):-1])
+            indicies[int(percent_train * numel):-1]
+        )
 
 
 class CrossValSplitter():
@@ -413,7 +439,8 @@ class CrossValSplitter():
 
         self.val = torch.utils.data.sampler.SubsetRandomSampler(self.folds[0])
         self.train = torch.utils.data.sampler.SubsetRandomSampler(
-            np.concatenate(self.folds[1:], axis=0))
+            np.concatenate(self.folds[1:], axis=0)
+        )
 
         self.metrics = {}
 
@@ -428,7 +455,8 @@ class CrossValSplitter():
         assert idx >= 0 and idx < len(self)
         self.val.inidicies = self.folds[idx]
         self.train.inidicies = np.concatenate(
-            self.folds[np.arange(len(self)) != idx], axis=0)
+            self.folds[np.arange(len(self)) != idx], axis=0
+        )
 
     def __next__(self):
         self.current_v_ind += 1
@@ -454,6 +482,7 @@ class CrossValSplitter():
 
 
 def set_bn_momentum_default(bn_momentum):
+
     def fn(m):
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
             m.momentum = bn_momentum
@@ -462,14 +491,17 @@ def set_bn_momentum_default(bn_momentum):
 
 
 class BNMomentumScheduler(object):
-    def __init__(self,
-                 model,
-                 bn_lambda,
-                 last_epoch=-1,
-                 setter=set_bn_momentum_default):
+
+    def __init__(
+            self, model, bn_lambda, last_epoch=-1,
+            setter=set_bn_momentum_default
+    ):
         if not isinstance(model, nn.Module):
-            raise RuntimeError("Class '{}' is not a PyTorch nn Module".format(
-                type(model).__name__))
+            raise RuntimeError(
+                "Class '{}' is not a PyTorch nn Module".format(
+                    type(model).__name__
+                )
+            )
 
         self.model = model
         self.setter = setter
@@ -511,18 +543,21 @@ class Trainer(object):
         Name of file to output tensorboard_logger to
     """
 
-    def __init__(self,
-                 model,
-                 model_fn,
-                 optimizer,
-                 checkpoint_name="ckpt",
-                 best_name="best",
-                 lr_scheduler=None,
-                 bnm_scheduler=None,
-                 eval_frequency=1,
-                 log_name=None):
+    def __init__(
+            self,
+            model,
+            model_fn,
+            optimizer,
+            checkpoint_name="ckpt",
+            best_name="best",
+            lr_scheduler=None,
+            bnm_scheduler=None,
+            eval_frequency=1,
+            log_name=None
+    ):
         self.model, self.model_fn, self.optimizer, self.lr_scheduler, self.bnm_scheduler = (
-            model, model_fn, optimizer, lr_scheduler, bnm_scheduler)
+            model, model_fn, optimizer, lr_scheduler, bnm_scheduler
+        )
 
         self.checkpoint_name, self.best_name = checkpoint_name, best_name
         self.eval_frequency = eval_frequency
@@ -536,7 +571,8 @@ class Trainer(object):
     @staticmethod
     def _print(mode, epoch, loss, eval_dict, count):
         to_print = "[{:d}] {}\tMean Loss: {:.4e}".format(
-            epoch, mode, loss / count)
+            epoch, mode, loss / count
+        )
         for k, v in natsorted(eval_dict.items(), key=itemgetter(0)):
             to_print += "\tMean {}: {:2.3f}%".format(k, stats.mean(v) * 1e2)
 
@@ -574,7 +610,8 @@ class Trainer(object):
                 for k, v in eval_res.items():
                     if v is not None:
                         tb_log.log_value(
-                            "Training {}".format(k), 1.0 - v, step=idx)
+                            "Training {}".format(k), 1.0 - v, step=idx
+                        )
 
             d_loader.dataset.randomize()
 
@@ -593,7 +630,8 @@ class Trainer(object):
             self.optimizer.zero_grad()
 
             _, loss, eval_res = self.model_fn(
-                self.model, data, eval=True, epoch=epoch)
+                self.model, data, eval=True, epoch=epoch
+            )
 
             total_loss += loss.data[0]
             count += 1
@@ -606,8 +644,7 @@ class Trainer(object):
                 tb_log.log_value("Eval loss", loss.data[0], step=idx)
                 for k, v in eval_res.items():
                     if v is not None:
-                        tb_log.log_value(
-                            "Eval {}".format(k), 1.0 - v, step=idx)
+                        tb_log.log_value("Eval {}".format(k), 1.0 - v, step=idx)
 
             d_loader.dataset.randomize()
 
@@ -615,12 +652,14 @@ class Trainer(object):
 
         return total_loss / count, eval_dict
 
-    def train(self,
-              start_epoch,
-              n_epochs,
-              train_loader,
-              test_loader=None,
-              best_loss=0.0):
+    def train(
+            self,
+            start_epoch,
+            n_epochs,
+            train_loader,
+            test_loader=None,
+            best_loss=0.0
+    ):
         r"""
            Call to begin training the model
 
@@ -649,10 +688,12 @@ class Trainer(object):
                 is_best = val_loss < best_loss
                 best_loss = min(best_loss, val_loss)
                 save_checkpoint(
-                    checkpoint_state(self.model, self.optimizer, val_loss,
-                                     epoch),
+                    checkpoint_state(
+                        self.model, self.optimizer, val_loss, epoch
+                    ),
                     is_best,
                     filename=self.checkpoint_name,
-                    bestname=self.best_name)
+                    bestname=self.best_name
+                )
 
         return best_loss