diff options
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | doc/routines.md | 3 | ||||
-rw-r--r-- | src/kernels/levelx/xconvgemm_part1.opencl | 1 | ||||
-rw-r--r-- | src/kernels/levelx/xconvgemm_part2.opencl | 1 | ||||
-rw-r--r-- | src/routines/levelx/xconvgemm.cpp | 4 | ||||
-rw-r--r-- | src/routines/levelx/xconvgemm.hpp | 2 |
6 files changed, 11 insertions, 2 deletions
@@ -3,6 +3,8 @@ Development (next version) - Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah') - Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel - Various minor fixes and enhancements +- Added non-BLAS routines: + * SCONVGEMM/DCONVGEMM/HCONVGEMM (convolution as im2col followed by batched GEMM) Version 1.4.1 - Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded diff --git a/doc/routines.md b/doc/routines.md index c5e14907..7c6a1eb9 100644 --- a/doc/routines.md +++ b/doc/routines.md @@ -93,8 +93,9 @@ In addition, some extra non-BLAS routines are also supported by CLBlast, classif | xHAD | ✔ | ✔ | ✔ | ✔ | ✔ | (Hadamard product) | xOMATCOPY | ✔ | ✔ | ✔ | ✔ | ✔ | (Out-of-place copying/transposing/scaling of matrices) | xIM2COL | ✔ | ✔ | ✔ | ✔ | ✔ | (Image to column transform as used to express convolution as GEMM) +| xCONVGEMM | ✔ | ✔ | - | - | ✔ | (Experimental, implemented as im2col followed by batched GEMM) -Some less commonly used BLAS routines are not yet supported yet by CLBlast. They are xROTG, xROTMG, xROT, xROTM, xTBSV, and xTPSV. +Some less commonly used BLAS routines are not yet supported by CLBlast. They are xROTG, xROTMG, xROT, xROTM, xTBSV, and xTPSV. Half precision (fp16) diff --git a/src/kernels/levelx/xconvgemm_part1.opencl b/src/kernels/levelx/xconvgemm_part1.opencl index 6f870ec0..abdb5324 100644 --- a/src/kernels/levelx/xconvgemm_part1.opencl +++ b/src/kernels/levelx/xconvgemm_part1.opencl @@ -11,6 +11,7 @@ // uses parameters from the direct GEMM kernel. This is the part with the loads from memory (1/2). // This uses "CONVGEMM_WITH_IM2COL" as a switch to select between direct convgemm or first running // the im2col kernel to create a 'col' temporary matrix. +// TODO: Currently only works with 'CONVGEMM_WITH_IM2COL' set // // ================================================================================================= diff --git a/src/kernels/levelx/xconvgemm_part2.opencl b/src/kernels/levelx/xconvgemm_part2.opencl index 46a72711..e0ac24a0 100644 --- a/src/kernels/levelx/xconvgemm_part2.opencl +++ b/src/kernels/levelx/xconvgemm_part2.opencl @@ -11,6 +11,7 @@ // uses parameters from the direct GEMM kernel. This part contains the main kernel (2/2). // This uses "CONVGEMM_WITH_IM2COL" as a switch to select between direct convgemm or first running // the im2col kernel to create a 'col' temporary matrix. +// TODO: Currently only works with 'CONVGEMM_WITH_IM2COL' set // // ================================================================================================= diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp index 5ad39751..f26f23a7 100644 --- a/src/routines/levelx/xconvgemm.cpp +++ b/src/routines/levelx/xconvgemm.cpp @@ -13,6 +13,7 @@ #include <string> #include <vector> +#include <assert.h> #include "routines/levelx/xconvgemm.hpp" #include "routines/levelx/xim2col.hpp" @@ -51,6 +52,9 @@ void Xconvgemm<T>::DoConvgemm(const size_t channels, const size_t height, const const Buffer<T> &kernel_buffer, const size_t kernel_offset, const Buffer<T> &result_buffer, const size_t result_offset) { + // TODO: Implement single-kernel approach + assert(method_ == ConvGemmMethod::kWithIm2Col); + // Tests for a valid batch count if (batch_count == 0) { throw BLASError(StatusCode::kInvalidBatchCount); diff --git a/src/routines/levelx/xconvgemm.hpp b/src/routines/levelx/xconvgemm.hpp index ac27657f..9d11ccee 100644 --- a/src/routines/levelx/xconvgemm.hpp +++ b/src/routines/levelx/xconvgemm.hpp @@ -29,7 +29,7 @@ class Xconvgemm: public Routine { // Constructor enum class ConvGemmMethod {kWithIm2Col, kSingleKernel}; Xconvgemm(Queue &queue, EventPointer event, const std::string &name = "CONVGEMM", - const ConvGemmMethod method = ConvGemmMethod::kSingleKernel); + const ConvGemmMethod method = ConvGemmMethod::kWithIm2Col); // Templated-precision implementation of the routine void DoConvgemm(const size_t channels, const size_t height, const size_t width, |