summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-01-04 19:33:51 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-01-04 19:33:51 +0100
commit44431daecc63cc4ead3208327bcd70834b3f4bdb (patch)
tree48aaac483856bbe6aa5ce54166d7adb8897e1aa5 /include
parentaf14fff1e9f93daa535b673ad1391fac397b5edc (diff)
Added a CUDA version of the GEMM temp-buffer optional argument
Diffstat (limited to 'include')
-rw-r--r--include/clblast_cuda.h3
1 files changed, 2 insertions, 1 deletions
diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h
index e28f68e5..0f510981 100644
--- a/include/clblast_cuda.h
+++ b/include/clblast_cuda.h
@@ -492,7 +492,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
const CUdeviceptr b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
CUdeviceptr c_buffer, const size_t c_offset, const size_t c_ld,
- const CUcontext context, const CUdevice device);
+ const CUcontext context, const CUdevice device,
+ CUdeviceptr temp_buffer = nullptr);
// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM
template <typename T>