From 009c30e5e3b2d3fcc4975358bcf4fabdcdb116af Mon Sep 17 00:00:00 2001 From: erikwijmans Date: Thu, 11 Jan 2018 19:42:01 -0500 Subject: [PATCH] Updates --- utils/cinclude/roi_mask_points_gpu.h | 29 ----- utils/cinclude/roi_mask_wrapper.h | 15 --- utils/csrc/roi_mask_points_gpu.cu | 157 --------------------------- utils/csrc/roi_mask_points_wrapper.c | 62 ----------- 4 files changed, 263 deletions(-) delete mode 100644 utils/cinclude/roi_mask_points_gpu.h delete mode 100644 utils/cinclude/roi_mask_wrapper.h delete mode 100644 utils/csrc/roi_mask_points_gpu.cu delete mode 100644 utils/csrc/roi_mask_points_wrapper.c diff --git a/utils/cinclude/roi_mask_points_gpu.h b/utils/cinclude/roi_mask_points_gpu.h deleted file mode 100644 index ad4adce..0000000 --- a/utils/cinclude/roi_mask_points_gpu.h +++ /dev/null @@ -1,29 +0,0 @@ - -#ifndef _ROI_MASK_POINTS_GPU_H -#define _ROI_MASK_POINTS_GPU_H - -#ifdef __cplusplus -extern "C" { -#endif -void roi_mask_kernel_wrapper(int n_roi, int b, int n, const float *rois, - const long *batch_indices, const float *data_xyz, - unsigned char *mask, cudaStream_t stream); - -void roi_avg_pool_kernel_forward_wrapper(int n_roi, int b, int n, int d, - const unsigned char *mask, - const long *batch_indices, - const float *points, - float *descriptors, - cudaStream_t stream); - -void roi_avg_pool_kernel_backward_wrapper(int n_roi, int b, int n, int d, - const unsigned char *mask, - const long *batch_indices, - const float *grad_descriptors, - float *grad_points, - cudaStream_t stream); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/utils/cinclude/roi_mask_wrapper.h b/utils/cinclude/roi_mask_wrapper.h deleted file mode 100644 index 4563e21..0000000 --- a/utils/cinclude/roi_mask_wrapper.h +++ /dev/null @@ -1,15 +0,0 @@ - -int roi_mask_wrapper(int n_roi, int b, int n, THCudaTensor *rois_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *data_xyz_tensor, - THCudaByteTensor *mask_tensor); -int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d, - THCudaByteTensor *mask_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *points_tensor, - THCudaTensor *descriptors_tensor); -int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d, - THCudaByteTensor *mask_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *grad_descriptors_tensor, - THCudaTensor *grad_points_tensor); diff --git a/utils/csrc/roi_mask_points_gpu.cu b/utils/csrc/roi_mask_points_gpu.cu deleted file mode 100644 index 25d3f6a..0000000 --- a/utils/csrc/roi_mask_points_gpu.cu +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include - -#include "cuda_utils.h" -#include "roi_mask_points_gpu.h" - -// roi format: [w, d, h, theta, cx, cy, cz] -__device__ bool is_in_roi(const float *__restrict__ xyz, - const float *__restrict__ roi) { - const float w = roi[0], d = roi[1], h = roi[2], theta = roi[3], cx = roi[4], - cy = roi[5], cz = roi[6]; - const float x = xyz[0], y = xyz[1], z = xyz[2]; - - const float sinval = sin(theta); - const float cosval = cos(theta); - - const float bx_x = w * cosval; - const float bx_y = d * -sinval; - - const float by_x = w * sinval; - const float by_y = d * cosval; - - const float dx = fabs(x - cx), dy = fabs(y - cy), dz = fabs(z - cz); - - return dx <= fabs(bx_x + by_x) && dy <= fabs(bx_y + by_y) && dz <= h; -} - -// Input rois (n_roi, 7), batch_indices (n_roi), data_xyz (b, n, 3) -// Ouput mask (n_roi, n) -__global__ void roi_mask_kernel(int n_roi, int b, int n, - const float *__restrict__ rois, - const long *__restrict__ batch_indices, - const float *__restrict__ data_xyz, - unsigned char *__restrict__ mask) { - - const int block_idx = blockIdx.x; - const float *__restrict__ roi = rois + block_idx * 7; - mask += block_idx * n; - - const long batch_idx = batch_indices[block_idx]; - data_xyz += batch_idx * n * 3; - - const int thread_idx = threadIdx.x; - const int thread_stride = blockDim.x; - for (int j = thread_idx; j < n; j += thread_stride) { - const float *__restrict__ xyz = data_xyz + j * 3; - mask[j] = is_in_roi(xyz, roi) ? 1 : 0; - } -} - -void roi_mask_kernel_wrapper(int n_roi, int b, int n, const float *rois, - const long *batch_indices, const float *data_xyz, - unsigned char *mask, cudaStream_t stream) { - - cudaError_t err; - unsigned int n_threads = opt_n_threads(n); - - roi_mask_kernel<<>>( - n_roi, b, n, rois, batch_indices, data_xyz, mask); - - err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } -} - -// Input mask(n_roi, n) batch_indices (n_roi), points (b, n, d) -// Ouput count (n_roi,) descriptors (n_roi, d) -__global__ void roi_avg_pool_kernel_forward( - int n_roi, int b, int n, int d, const unsigned char *__restrict__ mask, - const long *__restrict__ batch_indices, const float *__restrict__ points, - float *__restrict__ descriptors) { - - const int block_idx = blockIdx.x; - mask += block_idx * n; - descriptors += block_idx * d; - - const long batch_idx = batch_indices[block_idx]; - points += batch_idx * n * d; - - const int thread_idx = threadIdx.x; - const int thread_stride = blockDim.x; - - for (int j = thread_idx; j < n; j += thread_stride) { - if (mask[j] == 1) { - for (int c = 0; c < d; ++c) { - atomicAdd(descriptors + c, points[j * d + c]); - } - } - } -} - -void roi_avg_pool_kernel_forward_wrapper(int n_roi, int b, int n, int d, - const unsigned char *mask, - const long *batch_indices, - const float *points, - float *descriptors, - cudaStream_t stream) { - - cudaError_t err; - unsigned int n_threads = opt_n_threads(n); - - roi_avg_pool_kernel_forward<<>>( - n_roi, b, n, d, mask, batch_indices, points, descriptors); - - err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } -} - -__global__ void -roi_avg_pool_kernel_backward(int n_roi, int b, int n, int d, - const unsigned char *__restrict__ mask, - const long *__restrict__ batch_indices, - const float *__restrict__ grad_descriptors, - float *__restrict__ grad_points) { - - const int block_idx = blockIdx.x; - mask += block_idx * n; - grad_descriptors += block_idx * d; - - const long batch_idx = batch_indices[block_idx]; - grad_points += batch_idx * n * d; - - const int thread_idx = threadIdx.x; - const int thread_stride = blockDim.x; - for (int j = thread_idx; j < n; j += thread_stride) { - if (mask[j] == 1) { - for (int c = 0; c < d; ++c) { - atomicAdd(grad_points + j * d + c, grad_descriptors[c]); - } - } - } -} - -void roi_avg_pool_kernel_backward_wrapper(int n_roi, int b, int n, int d, - const unsigned char *mask, - const long *batch_indices, - const float *grad_descriptors, - float *grad_points, - cudaStream_t stream) { - - cudaError_t err; - unsigned int n_threads = opt_n_threads(n); - - roi_avg_pool_kernel_backward<<>>( - n_roi, b, n, d, mask, batch_indices, grad_descriptors, grad_points); - - err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); - exit(-1); - } -} diff --git a/utils/csrc/roi_mask_points_wrapper.c b/utils/csrc/roi_mask_points_wrapper.c deleted file mode 100644 index 7ce6ddf..0000000 --- a/utils/csrc/roi_mask_points_wrapper.c +++ /dev/null @@ -1,62 +0,0 @@ -#include - -#include "roi_mask_points_gpu.h" - -extern THCState *state; - -int roi_mask_wrapper(int n_roi, int b, int n, THCudaTensor *rois_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *data_xyz_tensor, - THCudaByteTensor *mask_tensor) { - - const float *rois = THCudaTensor_data(state, rois_tensor); - const long *batch_indices = - THCudaLongTensor_data(state, batch_indices_tensor); - const float *data_xyz = THCudaTensor_data(state, data_xyz_tensor); - unsigned char *mask = THCudaByteTensor_data(state, mask_tensor); - - cudaStream_t stream = THCState_getCurrentStream(state); - - roi_mask_kernel_wrapper(n_roi, b, n, rois, batch_indices, data_xyz, mask, - stream); - return 1; -} - -int roi_avg_pool_forward_wrapper(int n_roi, int b, int n, int d, - THCudaByteTensor *mask_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *points_tensor, - THCudaTensor *descriptors_tensor) { - - const long *batch_indices = - THCudaLongTensor_data(state, batch_indices_tensor); - const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor); - const float *points = THCudaTensor_data(state, points_tensor); - float *descriptors = THCudaTensor_data(state, descriptors_tensor); - - cudaStream_t stream = THCState_getCurrentStream(state); - roi_avg_pool_kernel_forward_wrapper(n_roi, b, n, d, mask, batch_indices, - points, descriptors, stream); - - return 1; -} - -int roi_avg_pool_backward_wrapper(int n_roi, int b, int n, int d, - THCudaByteTensor *mask_tensor, - THCudaLongTensor *batch_indices_tensor, - THCudaTensor *grad_descriptors_tensor, - THCudaTensor *grad_points_tensor) { - - const long *batch_indices = - THCudaLongTensor_data(state, batch_indices_tensor); - const unsigned char *mask = THCudaByteTensor_data(state, mask_tensor); - const float *grad_descriptors = - THCudaTensor_data(state, grad_descriptors_tensor); - float *grad_points = THCudaTensor_data(state, grad_points_tensor); - - cudaStream_t stream = THCState_getCurrentStream(state); - roi_avg_pool_kernel_backward_wrapper(n_roi, b, n, d, mask, batch_indices, - grad_descriptors, grad_points, stream); - - return 1; -}