summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-08-31 20:34:10 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-08-31 20:34:10 +0200
commit6194d43efba30aac90a64676e7770f020e4a5588 (patch)
tree5016ed0682d3aff0674f4ebf429a8f6338b0cc89 /src
parent54e160cd88d0a65fc72270b2414530d0dcc56ad1 (diff)
Fixed a bug in im2col confusing first and second workgroup size; made im2col kernel 2d instead of 3d
Diffstat (limited to 'src')
-rw-r--r--src/kernels/levelx/im2col.opencl8
-rw-r--r--src/routines/levelx/xim2col.cpp8
2 files changed, 8 insertions, 8 deletions
diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl
index a141db41..c3a5e419 100644
--- a/src/kernels/levelx/im2col.opencl
+++ b/src/kernels/levelx/im2col.opencl
@@ -17,10 +17,10 @@ R"(
// Work-group size parameters re-used from the 'copy' kernel
#ifndef COPY_DIMX
- #define COPY_DIMX 8 // Local workgroup size in the first dimension (x)
+ #define COPY_DIMX 8 // Local workgroup size in the first dimension (w)
#endif
#ifndef COPY_DIMY
- #define COPY_DIMY 8 // Local workgroup size in the second dimension (y)
+ #define COPY_DIMY 8 // Local workgroup size in the second dimension (h)
#endif
// =================================================================================================
@@ -37,8 +37,8 @@ void im2col(const int input_h, const int input_w,
// Thread IDs
const int w_id = get_global_id(0); // image width, max 'output_w'
- const int h_id = get_global_id(1); // image height, max 'output_h'
- const int c_id = get_global_id(2); // input channels
+ const int h_id = get_global_id(1) % output_h; // image height, max 'output_h'
+ const int c_id = get_global_id(1) / output_h; // input channels
if (h_id < output_h && w_id < output_w) {
#pragma unroll
diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp
index 51171eb5..527695c0 100644
--- a/src/routines/levelx/xim2col.cpp
+++ b/src/routines/levelx/xim2col.cpp
@@ -71,10 +71,10 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size
kernel.SetArgument(15, static_cast<int>(col_offset));
// Launches the kernel
- const auto w_ceiled = Ceil(output_w, db_["COPY_DIMY"]);
- const auto h_ceiled = Ceil(output_h, db_["COPY_DIMX"]);
- const auto global = std::vector<size_t>{w_ceiled, h_ceiled, channels};
- const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"], 1};
+ const auto w_ceiled = Ceil(output_w, db_["COPY_DIMX"]);
+ const auto h_ceiled = Ceil(output_h, db_["COPY_DIMY"]);
+ const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels};
+ const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]};
RunKernel(kernel, queue_, device_, global, local, event_);
}