diff options
author | Koichi Akabe <vbkaisetsu@gmail.com> | 2018-11-12 10:12:07 +0900 |
---|---|---|
committer | Koichi Akabe <vbkaisetsu@gmail.com> | 2018-11-12 10:12:07 +0900 |
commit | 032e3b0cc00a15dd2af8b4fb82d261eb7b086e26 (patch) | |
tree | cdcf4d0fc342c9ff92ee7ab3f75b0cdeced46e96 /src/kernels | |
parent | 90112618daa0d6b24ae3e53203a636d2e908dfba (diff) |
Add kernel_mode option to im2col, col2im, and convgemm functions
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/levelx/col2im.opencl | 73 | ||||
-rw-r--r-- | src/kernels/levelx/im2col.opencl | 59 |
2 files changed, 109 insertions, 23 deletions
diff --git a/src/kernels/levelx/col2im.opencl b/src/kernels/levelx/col2im.opencl index a37db24f..484a7a98 100644 --- a/src/kernels/levelx/col2im.opencl +++ b/src/kernels/levelx/col2im.opencl @@ -28,18 +28,20 @@ inline int grid_ceil(const int x, const int step) { return x > 0 ? ((x - 1) / step + 1) * step : x / step * step; } +// Main body of the kernel __kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) -void col2im(const int input_h, const int input_w, const int channels, - const int output_h, const int output_w, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int stride_bez_h, const int stride_bez_w, - const int dilation_bez_h, const int dilation_bez_w, - const int gcd_h, const int gcd_w, - const __global real* restrict col_buffer, const int col_offset, - __global real* im_buffer, const int im_offset) { +INLINE_FUNC void Xcol2im(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int stride_bez_h, const int stride_bez_w, + const int dilation_bez_h, const int dilation_bez_w, + const int gcd_h, const int gcd_w, + const bool kernel_flip, + const __global real* restrict col_buffer, const int col_offset, + __global real* im_buffer, const int im_offset) { const int input_h_scaled = (input_h - 1) / gcd_h + 1; @@ -71,8 +73,9 @@ void col2im(const int input_h, const int input_w, const int channels, const int kw_id = -tw / dilation_w + dilation_bez_w * gcd_scale_w; const int h_id = th / stride_h + stride_bez_h * gcd_scale_h; const int w_id = tw / stride_w + stride_bez_w * gcd_scale_w; - - const int kernel_index = kw_id + kernel_w * kh_id; + const int kernel_index = (kernel_flip) + ? kernel_h * kernel_w - kw_id - kernel_w * kh_id - 1 + : kw_id + kernel_w * kh_id; const int patch_index = w_id + output_w * h_id; const int output_index = patch_index + kernel_index * output_w * output_h + c_id * output_w * output_h * kernel_h * kernel_w; @@ -89,6 +92,50 @@ void col2im(const int input_h, const int input_w, const int channels, // ================================================================================================= +// Kernel flip version of the Xcol2im kernel (for convolution) +__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) +void Xcol2imKernelFlip(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int stride_bez_h, const int stride_bez_w, + const int dilation_bez_h, const int dilation_bez_w, + const int gcd_h, const int gcd_w, + const __global real* restrict col_buffer, const int col_offset, + __global real* im_buffer, const int im_offset) { + const bool kernel_flip = true; + Xcol2im(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + stride_bez_h, stride_bez_w, dilation_bez_h, dilation_bez_w, gcd_h, gcd_w, + kernel_flip, + col_buffer, col_offset, im_buffer, im_offset); +} + +// Normal version of the Xcol2im kernel (for cross-correlation) +__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) +void Xcol2imKernelNormal(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const int stride_bez_h, const int stride_bez_w, + const int dilation_bez_h, const int dilation_bez_w, + const int gcd_h, const int gcd_w, + const __global real* restrict col_buffer, const int col_offset, + __global real* im_buffer, const int im_offset) { + const bool kernel_flip = false; + Xcol2im(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + stride_bez_h, stride_bez_w, dilation_bez_h, dilation_bez_w, gcd_h, gcd_w, + kernel_flip, + col_buffer, col_offset, im_buffer, im_offset); +} + +// ================================================================================================= + // End of the C++11 raw string literal )" diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl index 301e076b..5db4cb5f 100644 --- a/src/kernels/levelx/im2col.opencl +++ b/src/kernels/levelx/im2col.opencl @@ -25,15 +25,16 @@ R"( // ================================================================================================= -__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) -void im2col(const int input_h, const int input_w, const int channels, - const int output_h, const int output_w, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const __global real* restrict im_buffer, const int im_offset, - __global real* col_buffer, const int col_offset) { +// Main body of the kernel +INLINE_FUNC void Xim2col(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const bool kernel_flip, + const __global real* restrict im_buffer, const int im_offset, + __global real* col_buffer, const int col_offset) { // Thread IDs const int w_id = get_global_id(0); // image width, max 'output_w' @@ -58,7 +59,9 @@ void im2col(const int input_h, const int input_w, const int channels, } // Sets the output value - const int kernel_index = kw_id + kernel_w * kh_id; + const int kernel_index = (kernel_flip) + ? kernel_h * kernel_w - kw_id - kernel_w * kh_id - 1 + : kw_id + kernel_w * kh_id; const int patch_index = w_id + output_w * h_id; const int output_index = patch_index + kernel_index * output_w * output_h + c_id * output_w * output_h * kernel_h * kernel_w; @@ -70,6 +73,42 @@ void im2col(const int input_h, const int input_w, const int channels, // ================================================================================================= +// Kernel flip version of the Xim2col kernel (for convolution) +__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) +void Xim2colKernelFlip(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const __global real* restrict im_buffer, const int im_offset, + __global real* col_buffer, const int col_offset) { + const bool kernel_flip = true; + Xim2col(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + kernel_flip, + im_buffer, im_offset, col_buffer, col_offset); +} + +// Normal version of the Xim2col kernel (for cross-correlation) +__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) +void Xim2colKernelNormal(const int input_h, const int input_w, const int channels, + const int output_h, const int output_w, + const int kernel_h, const int kernel_w, + const int pad_h, const int pad_w, + const int stride_h, const int stride_w, + const int dilation_h, const int dilation_w, + const __global real* restrict im_buffer, const int im_offset, + __global real* col_buffer, const int col_offset) { + const bool kernel_flip = false; + Xim2col(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w, + pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, + kernel_flip, + im_buffer, im_offset, col_buffer, col_offset); +} + +// ================================================================================================= + // End of the C++11 raw string literal )" |