summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
committerKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
commit032e3b0cc00a15dd2af8b4fb82d261eb7b086e26 (patch)
treecdcf4d0fc342c9ff92ee7ab3f75b0cdeced46e96 /src/kernels
parent90112618daa0d6b24ae3e53203a636d2e908dfba (diff)
Add kernel_mode option to im2col, col2im, and convgemm functions
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/levelx/col2im.opencl73
-rw-r--r--src/kernels/levelx/im2col.opencl59
2 files changed, 109 insertions, 23 deletions
diff --git a/src/kernels/levelx/col2im.opencl b/src/kernels/levelx/col2im.opencl
index a37db24f..484a7a98 100644
--- a/src/kernels/levelx/col2im.opencl
+++ b/src/kernels/levelx/col2im.opencl
@@ -28,18 +28,20 @@ inline int grid_ceil(const int x, const int step) {
return x > 0 ? ((x - 1) / step + 1) * step : x / step * step;
}
+// Main body of the kernel
__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
-void col2im(const int input_h, const int input_w, const int channels,
- const int output_h, const int output_w,
- const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w,
- const int stride_h, const int stride_w,
- const int dilation_h, const int dilation_w,
- const int stride_bez_h, const int stride_bez_w,
- const int dilation_bez_h, const int dilation_bez_w,
- const int gcd_h, const int gcd_w,
- const __global real* restrict col_buffer, const int col_offset,
- __global real* im_buffer, const int im_offset) {
+INLINE_FUNC void Xcol2im(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int stride_bez_h, const int stride_bez_w,
+ const int dilation_bez_h, const int dilation_bez_w,
+ const int gcd_h, const int gcd_w,
+ const bool kernel_flip,
+ const __global real* restrict col_buffer, const int col_offset,
+ __global real* im_buffer, const int im_offset) {
const int input_h_scaled = (input_h - 1) / gcd_h + 1;
@@ -71,8 +73,9 @@ void col2im(const int input_h, const int input_w, const int channels,
const int kw_id = -tw / dilation_w + dilation_bez_w * gcd_scale_w;
const int h_id = th / stride_h + stride_bez_h * gcd_scale_h;
const int w_id = tw / stride_w + stride_bez_w * gcd_scale_w;
-
- const int kernel_index = kw_id + kernel_w * kh_id;
+ const int kernel_index = (kernel_flip)
+ ? kernel_h * kernel_w - kw_id - kernel_w * kh_id - 1
+ : kw_id + kernel_w * kh_id;
const int patch_index = w_id + output_w * h_id;
const int output_index = patch_index + kernel_index * output_w * output_h +
c_id * output_w * output_h * kernel_h * kernel_w;
@@ -89,6 +92,50 @@ void col2im(const int input_h, const int input_w, const int channels,
// =================================================================================================
+// Kernel flip version of the Xcol2im kernel (for convolution)
+__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
+void Xcol2imKernelFlip(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int stride_bez_h, const int stride_bez_w,
+ const int dilation_bez_h, const int dilation_bez_w,
+ const int gcd_h, const int gcd_w,
+ const __global real* restrict col_buffer, const int col_offset,
+ __global real* im_buffer, const int im_offset) {
+ const bool kernel_flip = true;
+ Xcol2im(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+ stride_bez_h, stride_bez_w, dilation_bez_h, dilation_bez_w, gcd_h, gcd_w,
+ kernel_flip,
+ col_buffer, col_offset, im_buffer, im_offset);
+}
+
+// Normal version of the Xcol2im kernel (for cross-correlation)
+__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
+void Xcol2imKernelNormal(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const int stride_bez_h, const int stride_bez_w,
+ const int dilation_bez_h, const int dilation_bez_w,
+ const int gcd_h, const int gcd_w,
+ const __global real* restrict col_buffer, const int col_offset,
+ __global real* im_buffer, const int im_offset) {
+ const bool kernel_flip = false;
+ Xcol2im(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+ stride_bez_h, stride_bez_w, dilation_bez_h, dilation_bez_w, gcd_h, gcd_w,
+ kernel_flip,
+ col_buffer, col_offset, im_buffer, im_offset);
+}
+
+// =================================================================================================
+
// End of the C++11 raw string literal
)"
diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl
index 301e076b..5db4cb5f 100644
--- a/src/kernels/levelx/im2col.opencl
+++ b/src/kernels/levelx/im2col.opencl
@@ -25,15 +25,16 @@ R"(
// =================================================================================================
-__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
-void im2col(const int input_h, const int input_w, const int channels,
- const int output_h, const int output_w,
- const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w,
- const int stride_h, const int stride_w,
- const int dilation_h, const int dilation_w,
- const __global real* restrict im_buffer, const int im_offset,
- __global real* col_buffer, const int col_offset) {
+// Main body of the kernel
+INLINE_FUNC void Xim2col(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const bool kernel_flip,
+ const __global real* restrict im_buffer, const int im_offset,
+ __global real* col_buffer, const int col_offset) {
// Thread IDs
const int w_id = get_global_id(0); // image width, max 'output_w'
@@ -58,7 +59,9 @@ void im2col(const int input_h, const int input_w, const int channels,
}
// Sets the output value
- const int kernel_index = kw_id + kernel_w * kh_id;
+ const int kernel_index = (kernel_flip)
+ ? kernel_h * kernel_w - kw_id - kernel_w * kh_id - 1
+ : kw_id + kernel_w * kh_id;
const int patch_index = w_id + output_w * h_id;
const int output_index = patch_index + kernel_index * output_w * output_h +
c_id * output_w * output_h * kernel_h * kernel_w;
@@ -70,6 +73,42 @@ void im2col(const int input_h, const int input_w, const int channels,
// =================================================================================================
+// Kernel flip version of the Xim2col kernel (for convolution)
+__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
+void Xim2colKernelFlip(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const __global real* restrict im_buffer, const int im_offset,
+ __global real* col_buffer, const int col_offset) {
+ const bool kernel_flip = true;
+ Xim2col(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+ kernel_flip,
+ im_buffer, im_offset, col_buffer, col_offset);
+}
+
+// Normal version of the Xim2col kernel (for cross-correlation)
+__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1)))
+void Xim2colKernelNormal(const int input_h, const int input_w, const int channels,
+ const int output_h, const int output_w,
+ const int kernel_h, const int kernel_w,
+ const int pad_h, const int pad_w,
+ const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
+ const __global real* restrict im_buffer, const int im_offset,
+ __global real* col_buffer, const int col_offset) {
+ const bool kernel_flip = false;
+ Xim2col(input_h, input_w, channels, output_h, output_w, kernel_h, kernel_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
+ kernel_flip,
+ im_buffer, im_offset, col_buffer, col_offset);
+}
+
+// =================================================================================================
+
// End of the C++11 raw string literal
)"