summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-03-05 10:38:38 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-03-05 10:38:38 +0100
commitf9a520b3aff7b4eec99d9e11a03f9467e7ab351c (patch)
tree90612ac786448fa6e76681ecf6755f1c35c458a4 /include
parent37228c90988509acef9e8a892a752300b7645210 (diff)
Prepared generator for batched routines; added batched AXPY routine interface
Diffstat (limited to 'include')
-rw-r--r--include/clblast.h9
-rw-r--r--include/clblast_c.h32
2 files changed, 41 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h
index 020f8e79..aeea4e52 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -609,6 +609,15 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event = nullptr);
+// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED
+template <typename T>
+StatusCode AxpyBatched(const size_t n,
+ const T *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
// =================================================================================================
// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
diff --git a/include/clblast_c.h b/include/clblast_c.h
index 12d03f81..f933ef6c 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -1327,6 +1327,38 @@ CLBlastStatusCode PUBLIC_API CLBlastHomatcopy(const CLBlastLayout layout, const
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event);
+// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED
+CLBlastStatusCode PUBLIC_API CLBlastSaxpyBatched(const size_t n,
+ const float *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastDaxpyBatched(const size_t n,
+ const double *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastCaxpyBatched(const size_t n,
+ const cl_float2 *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastZaxpyBatched(const size_t n,
+ const cl_double2 *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastHaxpyBatched(const size_t n,
+ const cl_half *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event);
+
// =================================================================================================
// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on