diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-08-31 20:34:10 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-08-31 20:34:10 +0200 |
commit | 6194d43efba30aac90a64676e7770f020e4a5588 (patch) | |
tree | 5016ed0682d3aff0674f4ebf429a8f6338b0cc89 /src | |
parent | 54e160cd88d0a65fc72270b2414530d0dcc56ad1 (diff) |
Fixed a bug in im2col confusing first and second workgroup size; made im2col kernel 2d instead of 3d
Diffstat (limited to 'src')
-rw-r--r-- | src/kernels/levelx/im2col.opencl | 8 | ||||
-rw-r--r-- | src/routines/levelx/xim2col.cpp | 8 |
2 files changed, 8 insertions, 8 deletions
diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl index a141db41..c3a5e419 100644 --- a/src/kernels/levelx/im2col.opencl +++ b/src/kernels/levelx/im2col.opencl @@ -17,10 +17,10 @@ R"( // Work-group size parameters re-used from the 'copy' kernel #ifndef COPY_DIMX - #define COPY_DIMX 8 // Local workgroup size in the first dimension (x) + #define COPY_DIMX 8 // Local workgroup size in the first dimension (w) #endif #ifndef COPY_DIMY - #define COPY_DIMY 8 // Local workgroup size in the second dimension (y) + #define COPY_DIMY 8 // Local workgroup size in the second dimension (h) #endif // ================================================================================================= @@ -37,8 +37,8 @@ void im2col(const int input_h, const int input_w, // Thread IDs const int w_id = get_global_id(0); // image width, max 'output_w' - const int h_id = get_global_id(1); // image height, max 'output_h' - const int c_id = get_global_id(2); // input channels + const int h_id = get_global_id(1) % output_h; // image height, max 'output_h' + const int c_id = get_global_id(1) / output_h; // input channels if (h_id < output_h && w_id < output_w) { #pragma unroll diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp index 51171eb5..527695c0 100644 --- a/src/routines/levelx/xim2col.cpp +++ b/src/routines/levelx/xim2col.cpp @@ -71,10 +71,10 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size kernel.SetArgument(15, static_cast<int>(col_offset)); // Launches the kernel - const auto w_ceiled = Ceil(output_w, db_["COPY_DIMY"]); - const auto h_ceiled = Ceil(output_h, db_["COPY_DIMX"]); - const auto global = std::vector<size_t>{w_ceiled, h_ceiled, channels}; - const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"], 1}; + const auto w_ceiled = Ceil(output_w, db_["COPY_DIMX"]); + const auto h_ceiled = Ceil(output_h, db_["COPY_DIMY"]); + const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels}; + const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]}; RunKernel(kernel, queue_, device_, global, local, event_); } |