diff options
Diffstat (limited to 'src/routines')
-rw-r--r-- | src/routines/levelx/xcol2im.cpp | 8 | ||||
-rw-r--r-- | src/routines/levelx/xcol2im.hpp | 3 | ||||
-rw-r--r-- | src/routines/levelx/xconvgemm.cpp | 6 | ||||
-rw-r--r-- | src/routines/levelx/xconvgemm.hpp | 3 | ||||
-rw-r--r-- | src/routines/levelx/xim2col.cpp | 14 | ||||
-rw-r--r-- | src/routines/levelx/xim2col.hpp | 3 |
6 files changed, 25 insertions, 12 deletions
diff --git a/src/routines/levelx/xcol2im.cpp b/src/routines/levelx/xcol2im.cpp index 7a0c36b7..d285e5c0 100644 --- a/src/routines/levelx/xcol2im.cpp +++ b/src/routines/levelx/xcol2im.cpp @@ -31,13 +31,17 @@ Xcol2im<T>::Xcol2im(Queue &queue, EventPointer event, const std::string &name): // The main routine template <typename T> -void Xcol2im<T>::DoCol2im(const size_t channels, const size_t height, const size_t width, +void Xcol2im<T>::DoCol2im(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const Buffer<T> &col_buffer, const size_t col_offset, const Buffer<T> &im_buffer, const size_t im_offset) { + // Flip the output along kernel_h and kernel_w, or not. + const auto kernel_name = (kernel_mode == KernelMode::kConvolution) ? "Xcol2imKernelFlip" : "Xcol2imKernelNormal"; + // Makes sure all dimensions are larger than zero if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); } @@ -59,7 +63,7 @@ void Xcol2im<T>::DoCol2im(const size_t channels, const size_t height, const size EuclidGCD(static_cast<int>(stride_w), static_cast<int>(dilation_w), stride_bez_w, dilation_bez_w, gcd_w); // Retrieves the kernel from the compiled binary - auto kernel = Kernel(program_, "col2im"); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(height)); diff --git a/src/routines/levelx/xcol2im.hpp b/src/routines/levelx/xcol2im.hpp index 86d68c45..522c717e 100644 --- a/src/routines/levelx/xcol2im.hpp +++ b/src/routines/levelx/xcol2im.hpp @@ -29,7 +29,8 @@ class Xcol2im: public Routine { Xcol2im(Queue &queue, EventPointer event, const std::string &name = "COL2IM"); // Templated-precision implementation of the routine - void DoCol2im(const size_t channels, const size_t height, const size_t width, + void DoCol2im(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp index f26f23a7..88127b0f 100644 --- a/src/routines/levelx/xconvgemm.cpp +++ b/src/routines/levelx/xconvgemm.cpp @@ -43,7 +43,8 @@ Xconvgemm<T>::Xconvgemm(Queue &queue, EventPointer event, const std::string &nam // ================================================================================================= template <typename T> -void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const size_t width, +void Xconvgemm<T>::DoConvgemm(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, @@ -94,7 +95,8 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const const auto col_batch_offset = batch_id * patch_size * num_patches; auto im2col_event = Event(); auto im2col = Xim2col<T>(queue_, im2col_event.pointer()); - im2col.DoIm2col(channels, height, width, kernel_h, kernel_w, + im2col.DoIm2col(kernel_mode, + channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, im_buffer, im_batch_offset, col_buffer, col_batch_offset); diff --git a/src/routines/levelx/xconvgemm.hpp b/src/routines/levelx/xconvgemm.hpp index 9d11ccee..20cfff60 100644 --- a/src/routines/levelx/xconvgemm.hpp +++ b/src/routines/levelx/xconvgemm.hpp @@ -32,7 +32,8 @@ class Xconvgemm: public Routine { const ConvGemmMethod method = ConvGemmMethod::kWithIm2Col); // Templated-precision implementation of the routine - void DoConvgemm(const size_t channels, const size_t height, const size_t width, + void DoConvgemm(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp index 09dcc42c..0f786974 100644 --- a/src/routines/levelx/xim2col.cpp +++ b/src/routines/levelx/xim2col.cpp @@ -22,22 +22,26 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xim2col<T>::Xim2col(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy"}, PrecisionValue<T>(), {}, { -#include "../../kernels/levelx/im2col.opencl" - }) { + Routine(queue, event, name, {"Copy"}, PrecisionValue<T>(), {}, { + #include "../../kernels/levelx/im2col.opencl" + }) { } // ================================================================================================= // The main routine template <typename T> -void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size_t width, +void Xim2col<T>::DoIm2col(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const Buffer<T> &im_buffer, const size_t im_offset, const Buffer<T> &col_buffer, const size_t col_offset) { + // Flip the output along kernel_h and kernel_w, or not. + const auto kernel_name = (kernel_mode == KernelMode::kConvolution) ? "Xim2colKernelFlip" : "Xim2colKernelNormal"; + // Makes sure all dimensions are larger than zero if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); } @@ -50,7 +54,7 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size const auto col_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1; // Retrieves the kernel from the compiled binary - auto kernel = Kernel(program_, "im2col"); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(height)); diff --git a/src/routines/levelx/xim2col.hpp b/src/routines/levelx/xim2col.hpp index 2c03b169..77cc32eb 100644 --- a/src/routines/levelx/xim2col.hpp +++ b/src/routines/levelx/xim2col.hpp @@ -29,7 +29,8 @@ class Xim2col: public Routine { Xim2col(Queue &queue, EventPointer event, const std::string &name = "IM2COL"); // Templated-precision implementation of the routine - void DoIm2col(const size_t channels, const size_t height, const size_t width, + void DoIm2col(const KernelMode kernel_mode, + const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, |