diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/clblast.h | 9 | ||||
-rw-r--r-- | include/clblast_c.h | 32 |
2 files changed, 41 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h index 020f8e79..aeea4e52 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -609,6 +609,15 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_command_queue* queue, cl_event* event = nullptr); +// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED +template <typename T> +StatusCode AxpyBatched(const size_t n, + const T *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event = nullptr); + // ================================================================================================= // CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on diff --git a/include/clblast_c.h b/include/clblast_c.h index 12d03f81..f933ef6c 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -1327,6 +1327,38 @@ CLBlastStatusCode PUBLIC_API CLBlastHomatcopy(const CLBlastLayout layout, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_command_queue* queue, cl_event* event); +// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED +CLBlastStatusCode PUBLIC_API CLBlastSaxpyBatched(const size_t n, + const float *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDaxpyBatched(const size_t n, + const double *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastCaxpyBatched(const size_t n, + const cl_float2 *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZaxpyBatched(const size_t n, + const cl_double2 *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHaxpyBatched(const size_t n, + const cl_half *alphas, + const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc, + cl_mem *y_buffers, const size_t y_offset, const size_t y_inc, + const size_t batch_count, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on |