summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-03-05 10:38:38 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-03-05 10:38:38 +0100
commitf9a520b3aff7b4eec99d9e11a03f9467e7ab351c (patch)
tree90612ac786448fa6e76681ecf6755f1c35c458a4 /doc
parent37228c90988509acef9e8a892a752300b7645210 (diff)
Prepared generator for batched routines; added batched AXPY routine interface
Diffstat (limited to 'doc')
-rw-r--r--doc/clblast.md66
1 files changed, 66 insertions, 0 deletions
diff --git a/doc/clblast.md b/doc/clblast.md
index 1d7c0df2..eda5c07f 100644
--- a/doc/clblast.md
+++ b/doc/clblast.md
@@ -2903,6 +2903,72 @@ Requirements for OMATCOPY:
+xAXPYBATCHED: Batched version of AXPY
+-------------
+
+As AXPY, but multiple operations are batched together for better performance.
+
+C++ API:
+```
+template <typename T>
+StatusCode AxpyBatched(const size_t n,
+ const T *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+```
+
+C API:
+```
+CLBlastStatusCode CLBlastSaxpyBatched(const size_t n,
+ const float *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastDaxpyBatched(const size_t n,
+ const double *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastCaxpyBatched(const size_t n,
+ const cl_float2 *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastZaxpyBatched(const size_t n,
+ const cl_double2 *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastHaxpyBatched(const size_t n,
+ const cl_half *alphas,
+ const cl_mem *x_buffers, const size_t x_offset, const size_t x_inc,
+ cl_mem *y_buffers, const size_t y_offset, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event)
+```
+
+Arguments to AXPYBATCHED:
+
+* `const size_t n`: Integer size argument. This value must be positive.
+* `const T *alphas`: Input scalar constants.
+* `const cl_mem *x_buffers`: OpenCL buffers to store the input x vectors.
+* `const size_t x_offset`: The offset in elements from the start of the input x vectors.
+* `const size_t x_inc`: Stride/increment of the input x vectors. This value must be greater than 0.
+* `cl_mem *y_buffers`: OpenCL buffers to store the output y vectors.
+* `const size_t y_offset`: The offset in elements from the start of the output y vectors.
+* `const size_t y_inc`: Stride/increment of the output y vectors. This value must be greater than 0.
+* `const size_t batch_count`: Number of batches. This value must be positive.
+* `cl_command_queue* queue`: Pointer to an OpenCL command queue associated with a context and device to execute the routine on.
+* `cl_event* event`: Pointer to an OpenCL event to be able to wait for completion of the routine's OpenCL kernel(s). This is an optional argument.
+
+
+
ClearCache: Resets the cache of compiled binaries (auxiliary function)
-------------