summaryrefslogtreecommitdiff
path: root/include/clblast_cuda.h
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-05 14:06:33 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-05 14:06:33 +0200
commit2776d761768295b01a8be7c333dbb337805d7f77 (patch)
tree3fb955ea8e6d962e60dc6c8eba4b5e9800c3db91 /include/clblast_cuda.h
parent8b381480f8bd3b40cc030b07a599da10092b8117 (diff)
Added interface of batched convolution as GEMM
Diffstat (limited to 'include/clblast_cuda.h')
-rw-r--r--include/clblast_cuda.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h
index d82ee331..01044037 100644
--- a/include/clblast_cuda.h
+++ b/include/clblast_cuda.h
@@ -608,6 +608,14 @@ StatusCode Im2col(const size_t channels, const size_t height, const size_t width
CUdeviceptr col_buffer, const size_t col_offset,
const CUcontext context, const CUdevice device);
+// Batched convolution as GEMM (non-BLAS function): SCONVGEMM/DCONVGEMM/CCONVGEMM/ZCONVGEMM/HCONVGEMM
+template <typename T>
+StatusCode Convgemm(const size_t channels, const size_t height, const size_t width, const size_t kernel_h, const size_t kernel_w, const size_t pad_h, const size_t pad_w, const size_t stride_h, const size_t stride_w, const size_t dilation_h, const size_t dilation_w, const size_t num_kernels, const size_t batch_count,
+ const CUdeviceptr im_buffer, const size_t im_offset,
+ const CUdeviceptr kernel_buffer, const size_t kernel_offset,
+ CUdeviceptr result_buffer, const size_t result_offset,
+ const CUcontext context, const CUdevice device);
+
// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED
template <typename T>
StatusCode AxpyBatched(const size_t n,