summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-08-31 20:34:10 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-08-31 20:34:10 +0200
commit6194d43efba30aac90a64676e7770f020e4a5588 (patch)
tree5016ed0682d3aff0674f4ebf429a8f6338b0cc89
parent54e160cd88d0a65fc72270b2414530d0dcc56ad1 (diff)
Fixed a bug in im2col confusing first and second workgroup size; made im2col kernel 2d instead of 3d
-rw-r--r--src/kernels/levelx/im2col.opencl8
-rw-r--r--src/routines/levelx/xim2col.cpp8
-rw-r--r--test/routines/levelx/xim2col.hpp2
3 files changed, 9 insertions, 9 deletions
diff --git a/src/kernels/levelx/im2col.opencl b/src/kernels/levelx/im2col.opencl
index a141db41..c3a5e419 100644
--- a/src/kernels/levelx/im2col.opencl
+++ b/src/kernels/levelx/im2col.opencl
@@ -17,10 +17,10 @@ R"(
// Work-group size parameters re-used from the 'copy' kernel
#ifndef COPY_DIMX
- #define COPY_DIMX 8 // Local workgroup size in the first dimension (x)
+ #define COPY_DIMX 8 // Local workgroup size in the first dimension (w)
#endif
#ifndef COPY_DIMY
- #define COPY_DIMY 8 // Local workgroup size in the second dimension (y)
+ #define COPY_DIMY 8 // Local workgroup size in the second dimension (h)
#endif
// =================================================================================================
@@ -37,8 +37,8 @@ void im2col(const int input_h, const int input_w,
// Thread IDs
const int w_id = get_global_id(0); // image width, max 'output_w'
- const int h_id = get_global_id(1); // image height, max 'output_h'
- const int c_id = get_global_id(2); // input channels
+ const int h_id = get_global_id(1) % output_h; // image height, max 'output_h'
+ const int c_id = get_global_id(1) / output_h; // input channels
if (h_id < output_h && w_id < output_w) {
#pragma unroll
diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp
index 51171eb5..527695c0 100644
--- a/src/routines/levelx/xim2col.cpp
+++ b/src/routines/levelx/xim2col.cpp
@@ -71,10 +71,10 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size
kernel.SetArgument(15, static_cast<int>(col_offset));
// Launches the kernel
- const auto w_ceiled = Ceil(output_w, db_["COPY_DIMY"]);
- const auto h_ceiled = Ceil(output_h, db_["COPY_DIMX"]);
- const auto global = std::vector<size_t>{w_ceiled, h_ceiled, channels};
- const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"], 1};
+ const auto w_ceiled = Ceil(output_w, db_["COPY_DIMX"]);
+ const auto h_ceiled = Ceil(output_h, db_["COPY_DIMY"]);
+ const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels};
+ const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]};
RunKernel(kernel, queue_, device_, global, local, event_);
}
diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp
index 59be8156..4124190f 100644
--- a/test/routines/levelx/xim2col.hpp
+++ b/test/routines/levelx/xim2col.hpp
@@ -173,7 +173,7 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
}
}
}
- return StatusCode ::kSuccess;
+ return StatusCode::kSuccess;
}
// =================================================================================================