From 6307d2e5db1347112d992b2ef7a6cde9b3441389 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Thu, 17 Sep 2015 10:14:33 +0200 Subject: Added script to generate API interface and implementation automatically --- test/wrapper_clblas.h | 1654 +++++++++++++++++++++++++------------------------ 1 file changed, 838 insertions(+), 816 deletions(-) (limited to 'test') diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index fcf1a918..85729470 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -20,216 +20,222 @@ #include "internal/utilities.h" namespace clblast { + // ================================================================================================= // BLAS level-1 (vector-vector) routines +// ================================================================================================= -// Calls {clblasSswap, clblasDswap, clblasCswap, clblasZswap} with the arguments forwarded. -template clblasStatus clblasXswap( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); -template <> clblasStatus clblasXswap( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSswap(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXswap( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDswap(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXswap( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasCswap(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXswap( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasZswap(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP +template +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSswap(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDswap(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCswap(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZswap(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } -// Calls {clblasSscal, clblasDscal, clblasCscal, clblasZscal} with the arguments forwarded. -clblasStatus clblasXscal( - size_t n, float alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSscal(n, alpha, - x_vec, x_offset, static_cast(x_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXscal( - size_t n, double alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDscal(n, alpha, - x_vec, x_offset, static_cast(x_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXscal( - size_t n, float2 alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - return clblasCscal(n, cl_alpha, - x_vec, x_offset, static_cast(x_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXscal( - size_t n, double2 alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - return clblasZscal(n, cl_alpha, - x_vec, x_offset, static_cast(x_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL +clblasStatus clblasXscal(const size_t n, + const float alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSscal(n, + alpha, + x_buffer, x_offset, static_cast(x_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXscal(const size_t n, + const double alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDscal(n, + alpha, + x_buffer, x_offset, static_cast(x_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXscal(const size_t n, + const float2 alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCscal(n, + cl_float2{{alpha.real(), alpha.imag()}}, + x_buffer, x_offset, static_cast(x_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXscal(const size_t n, + const double2 alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZscal(n, + cl_double2{{alpha.real(), alpha.imag()}}, + x_buffer, x_offset, static_cast(x_inc), + num_queues, queues, num_wait_events, wait_events, events); } -// Calls {clblasScopy, clblasDcopy, clblasCcopy, clblasZcopy} with the arguments forwarded. -template clblasStatus clblasXcopy( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); -template <> clblasStatus clblasXcopy( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasScopy(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXcopy( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDcopy(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXcopy( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasCcopy(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -template <> clblasStatus clblasXcopy( - size_t n, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasZcopy(n, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY +template +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasScopy(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDcopy(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCcopy(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZcopy(n, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } -// Calls {clblasSaxpy, clblasDaxpy, clblasCaxpy, clblasZaxpy} with the arguments forwarded. -clblasStatus clblasXaxpy( - size_t n, float alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSaxpy(n, alpha, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXaxpy( - size_t n, double alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDaxpy(n, alpha, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXaxpy( - size_t n, float2 alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - return clblasCaxpy(n, cl_alpha, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXaxpy( - size_t n, double2 alpha, - const cl_mem x_vec, size_t x_offset, size_t x_inc, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - return clblasZaxpy(n, cl_alpha, - x_vec, x_offset, static_cast(x_inc), - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY +clblasStatus clblasXaxpy(const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSaxpy(n, + alpha, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy(const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDaxpy(n, + alpha, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy(const size_t n, + const float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCaxpy(n, + cl_float2{{alpha.real(), alpha.imag()}}, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy(const size_t n, + const double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZaxpy(n, + cl_double2{{alpha.real(), alpha.imag()}}, + x_buffer, x_offset, static_cast(x_inc), + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SDOT/DDOT -template clblasStatus clblasXdot( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); -template <> clblasStatus clblasXdot( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template +clblasStatus clblasXdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -240,13 +246,13 @@ template <> clblasStatus clblasXdot( scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } -template <> clblasStatus clblasXdot( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template <> +clblasStatus clblasXdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -259,20 +265,20 @@ template <> clblasStatus clblasXdot( } // Forwards the clBLAS calls for CDOTU/ZDOTU -template clblasStatus clblasXdotu( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); -template <> clblasStatus clblasXdotu( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template +clblasStatus clblasXdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -283,13 +289,13 @@ template <> clblasStatus clblasXdotu( scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } -template <> clblasStatus clblasXdotu( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template <> +clblasStatus clblasXdotu(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -302,20 +308,20 @@ template <> clblasStatus clblasXdotu( } // Forwards the clBLAS calls for CDOTC/ZDOTC -template clblasStatus clblasXdotc( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); -template <> clblasStatus clblasXdotc( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template +clblasStatus clblasXdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -326,13 +332,13 @@ template <> clblasStatus clblasXdotc( scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } -template <> clblasStatus clblasXdotc( - const size_t n, - cl_mem dot_buffer, const size_t dot_offset, - const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { +template <> +clblasStatus clblasXdotc(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); @@ -346,600 +352,616 @@ template <> clblasStatus clblasXdotc( // ================================================================================================= // BLAS level-2 (matrix-vector) routines +// ================================================================================================= -// Calls {clblasSgemv, clblasDgemv, clblasCgemv, clblasZgemv} with the arguments forwarded. -clblasStatus clblasXgemv( - clblasOrder layout, clblasTranspose a_transpose, size_t m, size_t n, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, float beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSgemv(layout, a_transpose, m, n, alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemv( - clblasOrder layout, clblasTranspose a_transpose, size_t m, size_t n, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, double beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDgemv(layout, a_transpose, m, n, alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemv( - clblasOrder layout, clblasTranspose a_transpose, size_t m, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, float2 beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasCgemv(layout, a_transpose, m, n, cl_alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), cl_beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemv( - clblasOrder layout, clblasTranspose a_transpose, size_t m, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, double2 beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZgemv(layout, a_transpose, m, n, cl_alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), cl_beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV +clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSgemv(layout, a_transpose, + m, n, + alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + beta, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDgemv(layout, a_transpose, + m, n, + alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + beta, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCgemv(layout, a_transpose, + m, n, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + cl_float2{{beta.real(), beta.imag()}}, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZgemv(layout, a_transpose, + m, n, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + cl_double2{{beta.real(), beta.imag()}}, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } -// Calls {clblasChemv, clblasZhemv} with the arguments forwarded. -clblasStatus clblasXhemv( - clblasOrder layout, clblasUplo triangle, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, float2 beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasChemv(layout, triangle, n, cl_alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), cl_beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXhemv( - clblasOrder layout, clblasUplo triangle, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, double2 beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZhemv(layout, triangle, n, cl_alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), cl_beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for CHEMV/ZHEMV +clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasChemv(layout, triangle, + n, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + cl_float2{{beta.real(), beta.imag()}}, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZhemv(layout, triangle, + n, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + cl_double2{{beta.real(), beta.imag()}}, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } -// Calls {clblasSsymv, clblasDsymv} with the arguments forwarded. -clblasStatus clblasXsymv( - clblasOrder layout, clblasUplo triangle, size_t n, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, float beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSsymv(layout, triangle, n, alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsymv( - clblasOrder layout, clblasUplo triangle, size_t n, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem x_vec, size_t x_offset, size_t x_inc, double beta, - const cl_mem y_vec, size_t y_offset, size_t y_inc, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDsymv(layout, triangle, n, alpha, - a_mat, a_offset, a_ld, - x_vec, x_offset, static_cast(x_inc), beta, - y_vec, y_offset, static_cast(y_inc), - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSYMV/DSYMV +clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsymv(layout, triangle, + n, + alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + beta, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsymv(layout, triangle, + n, + alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, static_cast(x_inc), + beta, + y_buffer, y_offset, static_cast(y_inc), + num_queues, queues, num_wait_events, wait_events, events); } // ================================================================================================= // BLAS level-3 (matrix-matrix) routines +// ================================================================================================= -// This calls {clblasSgemm, clblasDgemm, clblasCgemm, clblasZgemm} with the arguments forwarded. -clblasStatus clblasXgemm( - clblasOrder layout, clblasTranspose a_transpose, clblasTranspose b_transpose, - size_t m, size_t n, size_t k, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSgemm(layout, a_transpose, b_transpose, - m, n, k, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemm( - clblasOrder layout, clblasTranspose a_transpose, clblasTranspose b_transpose, - size_t m, size_t n, size_t k, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDgemm(layout, a_transpose, b_transpose, - m, n, k, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemm( - clblasOrder layout, clblasTranspose a_transpose, clblasTranspose b_transpose, - size_t m, size_t n, size_t k, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasCgemm(layout, a_transpose, b_transpose, - m, n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXgemm( - clblasOrder layout, clblasTranspose a_transpose, clblasTranspose b_transpose, - size_t m, size_t n, size_t k, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZgemm(layout, a_transpose, b_transpose, - m, n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM +clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSgemm(layout, a_transpose, b_transpose, + m, n, k, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } - -// This calls {clblasSsymm, clblasDsymm, clblasCsymm, clblasZsymm} with the arguments forwarded. -clblasStatus clblasXsymm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSsymm(layout, side, triangle, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsymm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDsymm(layout, side, triangle, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsymm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasCsymm(layout, side, triangle, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsymm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZsymm(layout, side, triangle, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDgemm(layout, a_transpose, b_transpose, + m, n, k, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCgemm(layout, a_transpose, b_transpose, + m, n, k, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_float2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZgemm(layout, a_transpose, b_transpose, + m, n, k, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_double2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasChemm, clblasZhemm} with the arguments forwarded. -clblasStatus clblasXhemm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasChemm(layout, side, triangle, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXhemm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - size_t m, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZhemm(layout, side, triangle, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM +clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsymm(layout, side, triangle, + m, n, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsymm(layout, side, triangle, + m, n, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCsymm(layout, side, triangle, + m, n, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_float2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZsymm(layout, side, triangle, + m, n, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_double2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasSsyrk, clblasDsyrk, clblasCsyrk, clblasZsyrk} with the arguments forwarded. -clblasStatus clblasXsyrk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSsyrk(layout, triangle, a_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyrk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDsyrk(layout, triangle, a_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyrk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, float2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasCsyrk(layout, triangle, a_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyrk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, double2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZsyrk(layout, triangle, a_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for CHEMM/ZHEMM +clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasChemm(layout, side, triangle, + m, n, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_float2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZhemm(layout, side, triangle, + m, n, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_double2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasCherk, clblasZherk} with the arguments forwarded. -clblasStatus clblasXherk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasCherk(layout, triangle, a_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXherk( - clblasOrder layout, clblasUplo triangle, clblasTranspose a_transpose, - size_t n, size_t k, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasZherk(layout, triangle, a_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK +clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsyrk(layout, triangle, a_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsyrk(layout, triangle, a_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCsyrk(layout, triangle, a_transpose, + n, k, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + cl_float2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZsyrk(layout, triangle, a_transpose, + n, k, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + cl_double2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasSsyr2k, clblasDsyr2k, clblasCsyr2k, clblasZsyr2k} with the arguments forwarded. -clblasStatus clblasXsyr2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasSsyr2k(layout, triangle, ab_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyr2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDsyr2k(layout, triangle, ab_transpose, - n, k, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyr2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; - return clblasCsyr2k(layout, triangle, ab_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXsyr2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; - return clblasZsyr2k(layout, triangle, ab_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, cl_beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for CHERK/ZHERK +clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCherk(layout, triangle, a_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZherk(layout, triangle, a_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasCher2k, clblasZher2k} with the arguments forwarded. -clblasStatus clblasXher2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - return clblasCher2k(layout, triangle, ab_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXher2k( - clblasOrder layout, clblasUplo triangle, clblasTranspose ab_transpose, - size_t n, size_t k, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, - const cl_mem c_mat, size_t c_offset, size_t c_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - return clblasZher2k(layout, triangle, ab_transpose, - n, k, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, beta, - c_mat, c_offset, c_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K +clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsyr2k(layout, triangle, ab_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsyr2k(layout, triangle, ab_transpose, + n, k, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCsyr2k(layout, triangle, ab_transpose, + n, k, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_float2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZsyr2k(layout, triangle, ab_transpose, + n, k, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + cl_double2{{beta.real(), beta.imag()}}, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasStrmm, clblasDtrmm, clblasCtrmm, clblasZtrmm} with the arguments forwarded. -clblasStatus clblasXtrmm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasStrmm(layout, side, triangle, a_transpose, diagonal, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrmm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDtrmm(layout, side, triangle, a_transpose, diagonal, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrmm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - return clblasCtrmm(layout, side, triangle, a_transpose, diagonal, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrmm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - return clblasZtrmm(layout, side, triangle, a_transpose, diagonal, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for CHER2K/ZHER2K +clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCher2k(layout, triangle, ab_transpose, + n, k, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZher2k(layout, triangle, ab_transpose, + n, k, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + beta, + c_buffer, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); } -// This calls {clblasStrsm, clblasDtrsm, clblasCtrsm, clblasZtrsm} with the arguments forwarded. -clblasStatus clblasXtrsm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, float alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasStrsm(layout, side, triangle, a_transpose, diagonal, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrsm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, double alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - return clblasDtrsm(layout, side, triangle, a_transpose, diagonal, - m, n, alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrsm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, float2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; - return clblasCtrsm(layout, side, triangle, a_transpose, diagonal, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); -} -clblasStatus clblasXtrsm( - clblasOrder layout, clblasSide side, clblasUplo triangle, - clblasTranspose a_transpose, clblasDiag diagonal, - size_t m, size_t n, double2 alpha, - const cl_mem a_mat, size_t a_offset, size_t a_ld, - const cl_mem b_mat, size_t b_offset, size_t b_ld, - cl_uint num_queues, cl_command_queue *queues, - cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { - auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; - return clblasZtrsm(layout, side, triangle, a_transpose, diagonal, - m, n, cl_alpha, - a_mat, a_offset, a_ld, - b_mat, b_offset, b_ld, - num_queues, queues, num_wait_events, wait_events, events); +// Forwards the clBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM +clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasStrmm(layout, side, triangle, a_transpose, diagonal, + m, n, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDtrmm(layout, side, triangle, a_transpose, diagonal, + m, n, + alpha, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasCtrmm(layout, side, triangle, a_transpose, diagonal, + m, n, + cl_float2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasZtrmm(layout, side, triangle, a_transpose, diagonal, + m, n, + cl_double2{{alpha.real(), alpha.imag()}}, + a_buffer, a_offset, a_ld, + b_buffer, b_offset, b_ld, + num_queues, queues, num_wait_events, wait_events, events); } // ================================================================================================= -- cgit v1.2.3