summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-09-07 22:02:44 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-09-07 22:02:44 +0200
commitc788e040f7f4e46d9f03644cadb65788fe42571e (patch)
tree46cfc164d5e0104174ee1b7ff1489eee2b23688d
parent2dd539f911dc9e53f188ed404ba95a795ee56fb6 (diff)
Added xCONVGEMM as im2col plus a batched GEMM kernel
-rw-r--r--CHANGELOG2
-rw-r--r--doc/routines.md3
-rw-r--r--src/kernels/levelx/xconvgemm_part1.opencl1
-rw-r--r--src/kernels/levelx/xconvgemm_part2.opencl1
-rw-r--r--src/routines/levelx/xconvgemm.cpp4
-rw-r--r--src/routines/levelx/xconvgemm.hpp2
6 files changed, 11 insertions, 2 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c1c639e1..53958d6f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,6 +3,8 @@ Development (next version)
- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah')
- Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel
- Various minor fixes and enhancements
+- Added non-BLAS routines:
+ * SCONVGEMM/DCONVGEMM/HCONVGEMM (convolution as im2col followed by batched GEMM)
Version 1.4.1
- Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded
diff --git a/doc/routines.md b/doc/routines.md
index c5e14907..7c6a1eb9 100644
--- a/doc/routines.md
+++ b/doc/routines.md
@@ -93,8 +93,9 @@ In addition, some extra non-BLAS routines are also supported by CLBlast, classif
| xHAD | ✔ | ✔ | ✔ | ✔ | ✔ | (Hadamard product)
| xOMATCOPY | ✔ | ✔ | ✔ | ✔ | ✔ | (Out-of-place copying/transposing/scaling of matrices)
| xIM2COL | ✔ | ✔ | ✔ | ✔ | ✔ | (Image to column transform as used to express convolution as GEMM)
+| xCONVGEMM | ✔ | ✔ | - | - | ✔ | (Experimental, implemented as im2col followed by batched GEMM)
-Some less commonly used BLAS routines are not yet supported yet by CLBlast. They are xROTG, xROTMG, xROT, xROTM, xTBSV, and xTPSV.
+Some less commonly used BLAS routines are not yet supported by CLBlast. They are xROTG, xROTMG, xROT, xROTM, xTBSV, and xTPSV.
Half precision (fp16)
diff --git a/src/kernels/levelx/xconvgemm_part1.opencl b/src/kernels/levelx/xconvgemm_part1.opencl
index 6f870ec0..abdb5324 100644
--- a/src/kernels/levelx/xconvgemm_part1.opencl
+++ b/src/kernels/levelx/xconvgemm_part1.opencl
@@ -11,6 +11,7 @@
// uses parameters from the direct GEMM kernel. This is the part with the loads from memory (1/2).
// This uses "CONVGEMM_WITH_IM2COL" as a switch to select between direct convgemm or first running
// the im2col kernel to create a 'col' temporary matrix.
+// TODO: Currently only works with 'CONVGEMM_WITH_IM2COL' set
//
// =================================================================================================
diff --git a/src/kernels/levelx/xconvgemm_part2.opencl b/src/kernels/levelx/xconvgemm_part2.opencl
index 46a72711..e0ac24a0 100644
--- a/src/kernels/levelx/xconvgemm_part2.opencl
+++ b/src/kernels/levelx/xconvgemm_part2.opencl
@@ -11,6 +11,7 @@
// uses parameters from the direct GEMM kernel. This part contains the main kernel (2/2).
// This uses "CONVGEMM_WITH_IM2COL" as a switch to select between direct convgemm or first running
// the im2col kernel to create a 'col' temporary matrix.
+// TODO: Currently only works with 'CONVGEMM_WITH_IM2COL' set
//
// =================================================================================================
diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp
index 5ad39751..f26f23a7 100644
--- a/src/routines/levelx/xconvgemm.cpp
+++ b/src/routines/levelx/xconvgemm.cpp
@@ -13,6 +13,7 @@
#include <string>
#include <vector>
+#include <assert.h>
#include "routines/levelx/xconvgemm.hpp"
#include "routines/levelx/xim2col.hpp"
@@ -51,6 +52,9 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const
const Buffer<T> &kernel_buffer, const size_t kernel_offset,
const Buffer<T> &result_buffer, const size_t result_offset) {
+ // TODO: Implement single-kernel approach
+ assert(method_ == ConvGemmMethod::kWithIm2Col);
+
// Tests for a valid batch count
if (batch_count == 0) {
throw BLASError(StatusCode::kInvalidBatchCount);
diff --git a/src/routines/levelx/xconvgemm.hpp b/src/routines/levelx/xconvgemm.hpp
index ac27657f..9d11ccee 100644
--- a/src/routines/levelx/xconvgemm.hpp
+++ b/src/routines/levelx/xconvgemm.hpp
@@ -29,7 +29,7 @@ class Xconvgemm: public Routine {
// Constructor
enum class ConvGemmMethod {kWithIm2Col, kSingleKernel};
Xconvgemm(Queue &queue, EventPointer event, const std::string &name = "CONVGEMM",
- const ConvGemmMethod method = ConvGemmMethod::kSingleKernel);
+ const ConvGemmMethod method = ConvGemmMethod::kWithIm2Col);
// Templated-precision implementation of the routine
void DoConvgemm(const size_t channels, const size_t height, const size_t width,