From 4d9d03ba512fb404537180a42cf17fb3e10a8033 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 24 Aug 2017 21:11:12 +0200 Subject: Completed im2col implementation --- src/kernels/levelx/im2col.opencl | 17 +++++++++++------ src/routines/levelx/xim2col.cpp | 10 +++++----- 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl index 3f10881f..a141db41 100644 --- a/src/kernels/levelx/im2col.opencl +++ b/src/kernels/levelx/im2col.opencl @@ -15,12 +15,17 @@ // literal). Comment-out this line for syntax-highlighting when developing. R"( -// ================================================================================================= +// Work-group size parameters re-used from the 'copy' kernel +#ifndef COPY_DIMX + #define COPY_DIMX 8 // Local workgroup size in the first dimension (x) +#endif +#ifndef COPY_DIMY + #define COPY_DIMY 8 // Local workgroup size in the second dimension (y) +#endif -#define WGS1 16 -#define WGS2 16 +// ================================================================================================= -__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +__kernel __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) void im2col(const int input_h, const int input_w, const int output_h, const int output_w, const int kernel_h, const int kernel_w, @@ -31,8 +36,8 @@ void im2col(const int input_h, const int input_w, __global real* col_buffer, const int col_offset) { // Thread IDs - const int h_id = get_global_id(0); // image height, max 'output_h' - const int w_id = get_global_id(1); // image width, max 'output_w' + const int w_id = get_global_id(0); // image width, max 'output_w' + const int h_id = get_global_id(1); // image height, max 'output_h' const int c_id = get_global_id(2); // input channels if (h_id < output_h && w_id < output_w) { diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp index 10c9c10c..51171eb5 100644 --- a/src/routines/levelx/xim2col.cpp +++ b/src/routines/levelx/xim2col.cpp @@ -22,7 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xim2col::Xim2col(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {}, PrecisionValue(), {}, { + Routine(queue, event, name, {"Copy"}, PrecisionValue(), {}, { #include "../../kernels/levelx/im2col.opencl" }) { } @@ -71,10 +71,10 @@ void Xim2col::DoIm2col(const size_t channels, const size_t height, const size kernel.SetArgument(15, static_cast(col_offset)); // Launches the kernel - const auto h_ceiled = Ceil(output_h, 16); - const auto w_ceiled = Ceil(output_w, 16); - auto global = std::vector{h_ceiled, w_ceiled, channels}; - auto local = std::vector{16, 16, 1}; + const auto w_ceiled = Ceil(output_w, db_["COPY_DIMY"]); + const auto h_ceiled = Ceil(output_h, db_["COPY_DIMX"]); + const auto global = std::vector{w_ceiled, h_ceiled, channels}; + const auto local = std::vector{db_["COPY_DIMX"], db_["COPY_DIMY"], 1}; RunKernel(kernel, queue_, device_, global, local, event_); } -- cgit v1.2.3