summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-11-09 08:06:13 +0100
committerGitHub <noreply@github.com>2018-11-09 08:06:13 +0100
commit90112618daa0d6b24ae3e53203a636d2e908dfba (patch)
tree4d9b0782a42da5cf6e54571459996907e2a93f2b /src/routines
parent441373c8fd1442cc4c024e59e7778b4811eb210c (diff)
parent6f67525ea693d0761c479b060c04ce93d408beb5 (diff)
Merge pull request #331 from CNugteren/CLBlast-270-col2im
Implements col2im routine
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/levelx/xcol2im.cpp107
-rw-r--r--src/routines/levelx/xcol2im.hpp45
-rw-r--r--src/routines/levelx/xim2col.cpp16
-rw-r--r--src/routines/levelx/xim2col.hpp1
-rw-r--r--src/routines/routines.hpp1
5 files changed, 162 insertions, 8 deletions
diff --git a/src/routines/levelx/xcol2im.cpp b/src/routines/levelx/xcol2im.cpp
new file mode 100644
index 00000000..7a0c36b7
--- /dev/null
+++ b/src/routines/levelx/xcol2im.cpp
@@ -0,0 +1,107 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xcol2im class (see the header for information about the class).
+//
+// =================================================================================================
+
+#include "routines/levelx/xcol2im.hpp"
+
+#include <string>
+#include <vector>
+
+namespace clblast {
+// =================================================================================================
+
+// Constructor: forwards to base class constructor
+template <typename T>
+Xcol2im<T>::Xcol2im(Queue &queue, EventPointer event, const std::string &name):
+ Routine(queue, event, name, {"Copy"}, PrecisionValue<T>(), {}, {
+#include "../../kernels/levelx/col2im.opencl"
+ }) {
+}
+
+// =================================================================================================
+
+// The main routine
+template <typename T>
+void Xcol2im<T>::DoCol2im(const size_t channels, const size_t height, const size_t width,
+ const size_t kernel_h, const size_t kernel_w, const size_t pad_h,
+ const size_t pad_w, const size_t stride_h, const size_t stride_w,
+ const size_t dilation_h, const size_t dilation_w,
+ const Buffer<T> &col_buffer, const size_t col_offset,
+ const Buffer<T> &im_buffer, const size_t im_offset) {
+
+ // Makes sure all dimensions are larger than zero
+ if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
+
+ // Sets the output height and width
+ const auto size_h = height + 2 * pad_h;
+ const auto padding_h = dilation_h * (kernel_h - 1) + 1;
+ const auto col_h = (size_h >= padding_h) ? (size_h - padding_h) / stride_h + 1 : 1;
+ const auto size_w = width + 2 * pad_w;
+ const auto padding_w = dilation_w * (kernel_w - 1) + 1;
+ const auto col_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;
+
+ int stride_bez_h = 0;
+ int stride_bez_w = 0;
+ int dilation_bez_h = 0;
+ int dilation_bez_w = 0;
+ int gcd_h = 0;
+ int gcd_w = 0;
+ EuclidGCD(static_cast<int>(stride_h), static_cast<int>(dilation_h), stride_bez_h, dilation_bez_h, gcd_h);
+ EuclidGCD(static_cast<int>(stride_w), static_cast<int>(dilation_w), stride_bez_w, dilation_bez_w, gcd_w);
+
+ // Retrieves the kernel from the compiled binary
+ auto kernel = Kernel(program_, "col2im");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(height));
+ kernel.SetArgument(1, static_cast<int>(width));
+ kernel.SetArgument(2, static_cast<int>(channels));
+ kernel.SetArgument(3, static_cast<int>(col_h));
+ kernel.SetArgument(4, static_cast<int>(col_w));
+ kernel.SetArgument(5, static_cast<int>(kernel_h));
+ kernel.SetArgument(6, static_cast<int>(kernel_w));
+ kernel.SetArgument(7, static_cast<int>(pad_h));
+ kernel.SetArgument(8, static_cast<int>(pad_w));
+ kernel.SetArgument(9, static_cast<int>(stride_h));
+ kernel.SetArgument(10, static_cast<int>(stride_w));
+ kernel.SetArgument(11, static_cast<int>(dilation_h));
+ kernel.SetArgument(12, static_cast<int>(dilation_w));
+ kernel.SetArgument(13, stride_bez_h);
+ kernel.SetArgument(14, stride_bez_w);
+ kernel.SetArgument(15, dilation_bez_h);
+ kernel.SetArgument(16, dilation_bez_w);
+ kernel.SetArgument(17, gcd_h);
+ kernel.SetArgument(18, gcd_w);
+ kernel.SetArgument(19, col_buffer());
+ kernel.SetArgument(20, static_cast<int>(col_offset));
+ kernel.SetArgument(21, im_buffer());
+ kernel.SetArgument(22, static_cast<int>(im_offset));
+
+ // Launches the kernel
+ const auto w_ceiled = Ceil((width - 1) / gcd_w + 1, db_["COPY_DIMX"]);
+ const auto h_ceiled = Ceil((height - 1) / gcd_h + 1, db_["COPY_DIMY"]);
+ const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels};
+ const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+}
+
+// =================================================================================================
+
+// Compiles the templated class
+template class Xcol2im<half>;
+template class Xcol2im<float>;
+template class Xcol2im<double>;
+template class Xcol2im<float2>;
+template class Xcol2im<double2>;
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/routines/levelx/xcol2im.hpp b/src/routines/levelx/xcol2im.hpp
new file mode 100644
index 00000000..86d68c45
--- /dev/null
+++ b/src/routines/levelx/xcol2im.hpp
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xcol2im routine. The precision is implemented using a template argument.
+// Uses the tuning parameters from the regular copy kernel.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XCOL2IM_H_
+#define CLBLAST_ROUTINES_XCOL2IM_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xcol2im: public Routine {
+ public:
+
+ // Constructor
+ Xcol2im(Queue &queue, EventPointer event, const std::string &name = "COL2IM");
+
+ // Templated-precision implementation of the routine
+ void DoCol2im(const size_t channels, const size_t height, const size_t width,
+ const size_t kernel_h, const size_t kernel_w,
+ const size_t pad_h, const size_t pad_w,
+ const size_t stride_h, const size_t stride_w,
+ const size_t dilation_h, const size_t dilation_w,
+ const Buffer<T> &col_buffer, const size_t col_offset,
+ const Buffer<T> &im_buffer, const size_t im_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XCOL2IM_H_
+#endif
diff --git a/src/routines/levelx/xim2col.cpp b/src/routines/levelx/xim2col.cpp
index dfbb4bb5..09dcc42c 100644
--- a/src/routines/levelx/xim2col.cpp
+++ b/src/routines/levelx/xim2col.cpp
@@ -41,23 +41,23 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size
// Makes sure all dimensions are larger than zero
if ((channels == 0) || (height == 0) || (width == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
- // Sets the output height and width
+ // Sets the height and width of the 'col' result
const auto size_h = height + 2 * pad_h;
const auto padding_h = dilation_h * (kernel_h - 1) + 1;
- const auto output_h = (size_h >= padding_h) ? (size_h - padding_h) / stride_h + 1 : 1;
+ const auto col_h = (size_h >= padding_h) ? (size_h - padding_h) / stride_h + 1 : 1;
const auto size_w = width + 2 * pad_w;
const auto padding_w = dilation_w * (kernel_w - 1) + 1;
- const auto output_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;
+ const auto col_w = (size_w >= padding_w) ? (size_w - padding_w) / stride_w + 1 : 1;
- // Retrieves the Xcopy kernel from the compiled binary
+ // Retrieves the kernel from the compiled binary
auto kernel = Kernel(program_, "im2col");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(height));
kernel.SetArgument(1, static_cast<int>(width));
kernel.SetArgument(2, static_cast<int>(channels));
- kernel.SetArgument(3, static_cast<int>(output_h));
- kernel.SetArgument(4, static_cast<int>(output_w));
+ kernel.SetArgument(3, static_cast<int>(col_h));
+ kernel.SetArgument(4, static_cast<int>(col_w));
kernel.SetArgument(5, static_cast<int>(kernel_h));
kernel.SetArgument(6, static_cast<int>(kernel_w));
kernel.SetArgument(7, static_cast<int>(pad_h));
@@ -72,8 +72,8 @@ void Xim2col<T>::DoIm2col(const size_t channels, const size_t height, const size
kernel.SetArgument(16, static_cast<int>(col_offset));
// Launches the kernel
- const auto w_ceiled = Ceil(output_w, db_["COPY_DIMX"]);
- const auto h_ceiled = Ceil(output_h, db_["COPY_DIMY"]);
+ const auto w_ceiled = Ceil(col_w, db_["COPY_DIMX"]);
+ const auto h_ceiled = Ceil(col_h, db_["COPY_DIMY"]);
const auto global = std::vector<size_t>{w_ceiled, h_ceiled * channels};
const auto local = std::vector<size_t>{db_["COPY_DIMX"], db_["COPY_DIMY"]};
RunKernel(kernel, queue_, device_, global, local, event_);
diff --git a/src/routines/levelx/xim2col.hpp b/src/routines/levelx/xim2col.hpp
index 4448b54e..2c03b169 100644
--- a/src/routines/levelx/xim2col.hpp
+++ b/src/routines/levelx/xim2col.hpp
@@ -8,6 +8,7 @@
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xim2col routine. The precision is implemented using a template argument.
+// Uses the tuning parameters from the regular copy kernel.
//
// =================================================================================================
diff --git a/src/routines/routines.hpp b/src/routines/routines.hpp
index e080ed47..95475470 100644
--- a/src/routines/routines.hpp
+++ b/src/routines/routines.hpp
@@ -70,6 +70,7 @@
#include "routines/levelx/xhad.hpp"
#include "routines/levelx/xomatcopy.hpp"
#include "routines/levelx/xim2col.hpp"
+#include "routines/levelx/xcol2im.hpp"
#include "routines/levelx/xconvgemm.hpp"
#include "routines/levelx/xaxpybatched.hpp"
#include "routines/levelx/xgemmbatched.hpp"