summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
committerKoichi Akabe <vbkaisetsu@gmail.com>2018-11-12 10:12:07 +0900
commit032e3b0cc00a15dd2af8b4fb82d261eb7b086e26 (patch)
treecdcf4d0fc342c9ff92ee7ab3f75b0cdeced46e96 /src/routines
parent90112618daa0d6b24ae3e53203a636d2e908dfba (diff)
Add kernel_mode option to im2col, col2im, and convgemm functions
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/levelx/xcol2im.cpp8
-rw-r--r--src/routines/levelx/xcol2im.hpp3
-rw-r--r--src/routines/levelx/xconvgemm.cpp6
-rw-r--r--src/routines/levelx/xconvgemm.hpp3
-rw-r--r--src/routines/levelx/xim2col.cpp14
-rw-r--r--src/routines/levelx/xim2col.hpp3
6 files changed, 25 insertions, 12 deletions
diff --git a/src/routines/levelx/xcol2im.cpp b/src/routines/levelx/xcol2im.cpp
index 7a0c36b7..d285e5c0 100644
--- a/src/routines/levelx/xcol2im.cpp
+++ b/src/routines/levelx/xcol2im.cpp
@@ -31,13 +31,17 @@ Xcol2im<T>::Xcol2im(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-void Xcol2im<T>::DoCol2im(const size_t channels, const size_t height, const size_t width,
+void Xcol2im<T>::DoCol2im(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w, const size_t pad_h,
const size_t pad_w, const size_t stride_h, const size_t stride_w,
const size_t dilation_h, const size_t dilation_w,
const Buffer<T> &col_buffer, const size_t col_offset,
const Buffer<T> &im_buffer, const size_t im_offset) {
+ // Flip the output along kernel_h and kernel_w, or not.
+ const auto kernel_name = (kernel_mode == KernelMode::kConvolution) ? "Xcol2imKernelFlip" : "Xcol2imKernelNormal";
+
// Makes sure all dimensions are larger than zero
if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
@@ -59,7 +63,7 @@ void Xcol2im<T>::DoCol2im(const size_t channels, const size_t height, const size
EuclidGCD(static_cast<int>(stride_w), static_cast<int>(dilation_w), stride_bez_w, dilation_bez_w, gcd_w);
// Retrieves the kernel from the compiled binary
- auto kernel = Kernel(program_, "col2im");
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(height));
diff --git a/src/routines/levelx/xcol2im.hpp b/src/routines/levelx/xcol2im.hpp
index 86d68c45..522c717e 100644
--- a/src/routines/levelx/xcol2im.hpp
+++ b/src/routines/levelx/xcol2im.hpp
@@ -29,7 +29,8 @@ class Xcol2im: public Routine {
Xcol2im(Queue &queue, EventPointer event, const std::string &name = "COL2IM");
// Templated-precision implementation of the routine
- void DoCol2im(const size_t channels, const size_t height, const size_t width,
+ void DoCol2im(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w,
const size_t pad_h, const size_t pad_w,
const size_t stride_h, const size_t stride_w,
diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp
index f26f23a7..88127b0f 100644
--- a/src/routines/levelx/xconvgemm.cpp
+++ b/src/routines/levelx/xconvgemm.cpp
@@ -43,7 +43,8 @@ Xconvgemm<T>::Xconvgemm(Queue &queue, EventPointer event, const std::string &nam
// =================================================================================================
template <typename T>
-void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const size_t width,
+void Xconvgemm<T>::DoConvgemm(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w, const size_t pad_h,
const size_t pad_w, const size_t stride_h, const size_t stride_w,
const size_t dilation_h, const size_t dilation_w,
@@ -94,7 +95,8 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const
const auto col_batch_offset = batch_id * patch_size * num_patches;
auto im2col_event = Event();
auto im2col = Xim2col<T>(queue_, im2col_event.pointer());
- im2col.DoIm2col(channels, height, width, kernel_h, kernel_w,
+ im2col.DoIm2col(kernel_mode,
+ channels, height, width, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
im_buffer, im_batch_offset,
col_buffer, col_batch_offset);
diff --git a/src/routines/levelx/xconvgemm.hpp b/src/routines/levelx/xconvgemm.hpp
index 9d11ccee..20cfff60 100644
--- a/src/routines/levelx/xconvgemm.hpp
+++ b/src/routines/levelx/xconvgemm.hpp
@@ -32,7 +32,8 @@ class Xconvgemm: public Routine {
const ConvGemmMethod method = ConvGemmMethod::kWithIm2Col);
// Templated-precision implementation of the routine
- void DoConvgemm(const size_t channels, const size_t height, const size_t width,
+ void DoConvgemm(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w,
const size_t pad_h, const size_t pad_w,
const size_t stride_h, const size_t stride_w,
diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp
index 09dcc42c..0f786974 100644
--- a/src/routines/levelx/xim2col.cpp
+++ b/src/routines/levelx/xim2col.cpp
@@ -22,22 +22,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xim2col<T>::Xim2col(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy"}, PrecisionValue<T>(), {}, {
-#include "../../kernels/levelx/im2col.opencl"
- }) {
+ Routine(queue, event, name, {"Copy"}, PrecisionValue<T>(), {}, {
+ #include "../../kernels/levelx/im2col.opencl"
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size_t width,
+void Xim2col<T>::DoIm2col(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w, const size_t pad_h,
const size_t pad_w, const size_t stride_h, const size_t stride_w,
const size_t dilation_h, const size_t dilation_w,
const Buffer<T> &im_buffer, const size_t im_offset,
const Buffer<T> &col_buffer, const size_t col_offset) {
+ // Flip the output along kernel_h and kernel_w, or not.
+ const auto kernel_name = (kernel_mode == KernelMode::kConvolution) ? "Xim2colKernelFlip" : "Xim2colKernelNormal";
+
// Makes sure all dimensions are larger than zero
if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
@@ -50,7 +54,7 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size
const auto col_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;
// Retrieves the kernel from the compiled binary
- auto kernel = Kernel(program_, "im2col");
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(height));
diff --git a/src/routines/levelx/xim2col.hpp b/src/routines/levelx/xim2col.hpp
index 2c03b169..77cc32eb 100644
--- a/src/routines/levelx/xim2col.hpp
+++ b/src/routines/levelx/xim2col.hpp
@@ -29,7 +29,8 @@ class Xim2col: public Routine {
Xim2col(Queue &queue, EventPointer event, const std::string &name = "IM2COL");
// Templated-precision implementation of the routine
- void DoIm2col(const size_t channels, const size_t height, const size_t width,
+ void DoIm2col(const KernelMode kernel_mode,
+ const size_t channels, const size_t height, const size_t width,
const size_t kernel_h, const size_t kernel_w,
const size_t pad_h, const size_t pad_w,
const size_t stride_h, const size_t stride_w,