// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file implements a wrapper around the clBLAS library, such that its routines can be called // in a similar way as the CLBlast routines: using alpha and beta to determine the precision. // // ================================================================================================= #ifndef CLBLAST_TEST_WRAPPER_CLBLAS_H_ #define CLBLAST_TEST_WRAPPER_CLBLAS_H_ #include #include "utilities/utilities.hpp" namespace clblast { // Conversions from CLBlast types clblasOrder convertToCLBLAS(const Layout v) { return (v == Layout::kRowMajor) ? clblasRowMajor : clblasColumnMajor; } clblasTranspose convertToCLBLAS(const Transpose v) { return (v == Transpose::kNo) ? clblasNoTrans : (v == Transpose::kYes) ? clblasTrans : clblasConjTrans; } clblasUplo convertToCLBLAS(const Triangle v) { return (v == Triangle::kUpper) ? clblasUpper : clblasLower; } clblasDiag convertToCLBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? clblasUnit : clblasNonUnit; } clblasSide convertToCLBLAS(const Side v) { return (v == Side::kLeft) ? clblasLeft : clblasRight; } // ================================================================================================= // BLAS level-1 (vector-vector) routines // ================================================================================================= // Forwards the clBLAS calls for SROTG/DROTG template clblasStatus clblasXrotg(Buffer& sa_buffer, const size_t sa_offset, Buffer& sb_buffer, const size_t sb_offset, Buffer& sc_buffer, const size_t sc_offset, Buffer& ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXrotg(Buffer& sa_buffer, const size_t sa_offset, Buffer& sb_buffer, const size_t sb_offset, Buffer& sc_buffer, const size_t sc_offset, Buffer& ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSrotg(sa_buffer(), sa_offset, sb_buffer(), sb_offset, sc_buffer(), sc_offset, ss_buffer(), ss_offset, num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXrotg(Buffer& sa_buffer, const size_t sa_offset, Buffer& sb_buffer, const size_t sb_offset, Buffer& sc_buffer, const size_t sc_offset, Buffer& ss_buffer, const size_t ss_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDrotg(sa_buffer(), sa_offset, sb_buffer(), sb_offset, sc_buffer(), sc_offset, ss_buffer(), ss_offset, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SROTMG/DROTMG template clblasStatus clblasXrotmg(Buffer& sd1_buffer, const size_t sd1_offset, Buffer& sd2_buffer, const size_t sd2_offset, Buffer& sx1_buffer, const size_t sx1_offset, const Buffer& sy1_buffer, const size_t sy1_offset, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXrotmg(Buffer& sd1_buffer, const size_t sd1_offset, Buffer& sd2_buffer, const size_t sd2_offset, Buffer& sx1_buffer, const size_t sx1_offset, const Buffer& sy1_buffer, const size_t sy1_offset, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSrotmg(sd1_buffer(), sd1_offset, sd2_buffer(), sd2_offset, sx1_buffer(), sx1_offset, sy1_buffer(), sy1_offset, sparam_buffer(), sparam_offset, num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXrotmg(Buffer& sd1_buffer, const size_t sd1_offset, Buffer& sd2_buffer, const size_t sd2_offset, Buffer& sx1_buffer, const size_t sx1_offset, const Buffer& sy1_buffer, const size_t sy1_offset, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDrotmg(sd1_buffer(), sd1_offset, sd2_buffer(), sd2_offset, sx1_buffer(), sx1_offset, sy1_buffer(), sy1_offset, sparam_buffer(), sparam_offset, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SROT/DROT clblasStatus clblasXrot(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, const float cos, const float sin, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSrot(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), cos, sin, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXrot(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, const double cos, const double sin, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDrot(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), cos, sin, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SROTM/DROTM template clblasStatus clblasXrotm(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXrotm(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSrotm(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), sparam_buffer(), sparam_offset, num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXrotm(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& sparam_buffer, const size_t sparam_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDrotm(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), sparam_buffer(), sparam_offset, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP template clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSswap(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDswap(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCswap(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZswap(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXswap(const size_t n, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXswap(n, x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL clblasStatus clblasXscal(const size_t n, const float alpha, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSscal(n, alpha, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXscal(const size_t n, const double alpha, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDscal(n, alpha, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXscal(const size_t n, const float2 alpha, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCscal(n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXscal(const size_t n, const double2 alpha, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZscal(n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXscal(const size_t n, const half alpha, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto status = clblasXscal(n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY template clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasScopy(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDcopy(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCcopy(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZcopy(n, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXcopy(const size_t n, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXcopy(n, x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY clblasStatus clblasXaxpy(const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSaxpy(n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXaxpy(const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDaxpy(n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXaxpy(const size_t n, const float2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCaxpy(n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXaxpy(const size_t n, const double2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZaxpy(n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXaxpy(const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXaxpy(n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SDOT/DDOT template clblasStatus clblasXdot(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXdot(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasSdot(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXdot(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDdot(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXdot(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto dot_buffer_bis = HalfToFloatBuffer(dot_buffer, queues[0]); auto status = clblasXdot(n, dot_buffer_bis, dot_offset, x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(dot_buffer, dot_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CDOTU/ZDOTU template clblasStatus clblasXdotu(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXdotu(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasCdotu(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXdotu(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasZdotu(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CDOTC/ZDOTC template clblasStatus clblasXdotc(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXdotc(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasCdotc(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXdotc(const size_t n, Buffer& dot_buffer, const size_t dot_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasZdotc(n, dot_buffer(), dot_offset, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2 template clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasSnrm2(n, nrm2_buffer(), nrm2_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasDnrm2(n, nrm2_buffer(), nrm2_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasScnrm2(n, nrm2_buffer(), nrm2_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasDznrm2(n, nrm2_buffer(), nrm2_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXnrm2(const size_t n, Buffer& nrm2_buffer, const size_t nrm2_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto nrm2_buffer_bis = HalfToFloatBuffer(nrm2_buffer, queues[0]); auto status = clblasXnrm2(n, nrm2_buffer_bis, nrm2_offset, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(nrm2_buffer, nrm2_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM template clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasSasum(n, asum_buffer(), asum_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDasum(n, asum_buffer(), asum_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasScasum(n, asum_buffer(), asum_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDzasum(n, asum_buffer(), asum_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXasum(const size_t n, Buffer& asum_buffer, const size_t asum_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto asum_buffer_bis = HalfToFloatBuffer(asum_buffer, queues[0]); auto status = clblasXasum(n, asum_buffer_bis, asum_offset, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(asum_buffer, asum_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX template clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasiSamax(n, imax_buffer(), imax_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasiDamax(n, imax_buffer(), imax_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasiCamax(n, imax_buffer(), imax_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, 2*n); return clblasiZamax(n, imax_buffer(), imax_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXamax(const size_t n, Buffer& imax_buffer, const size_t imax_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto imax_buffer_bis = HalfToFloatBuffer(imax_buffer, queues[0]); auto status = clblasXamax(n, imax_buffer_bis, imax_offset, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(imax_buffer, imax_buffer_bis, queues[0]); return status; } // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= // Forwards the clBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSgemv(layout, a_transpose, m, n, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDgemv(layout, a_transpose, m, n, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCgemv(layout, a_transpose, m, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_float2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZgemv(layout, a_transpose, m, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_double2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const half beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXgemv(layout, a_transpose, m, n, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, HalfToFloat(beta), y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSgbmv(layout, a_transpose, m, n, kl, ku, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDgbmv(layout, a_transpose, m, n, kl, ku, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCgbmv(layout, a_transpose, m, n, kl, ku, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_float2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZgbmv(layout, a_transpose, m, n, kl, ku, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_double2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const half beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXgbmv(layout, a_transpose, m, n, kl, ku, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, HalfToFloat(beta), y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CHEMV/ZHEMV clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChemv(layout, triangle, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_float2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhemv(layout, triangle, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_double2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHBMV/ZHBMV clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const size_t k, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChbmv(layout, triangle, n, k, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_float2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const size_t k, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhbmv(layout, triangle, n, k, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), cl_double2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHPMV/ZHPMV clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float2 alpha, const Buffer& ap_buffer, const size_t ap_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChpmv(layout, triangle, n, cl_float2{{alpha.real(), alpha.imag()}}, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), cl_float2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double2 alpha, const Buffer& ap_buffer, const size_t ap_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhpmv(layout, triangle, n, cl_double2{{alpha.real(), alpha.imag()}}, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), cl_double2{{beta.real(), beta.imag()}}, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SSYMV/DSYMV clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsymv(layout, triangle, n, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsymv(layout, triangle, n, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const half beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXsymv(layout, triangle, n, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, HalfToFloat(beta), y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSBMV/DSBMV clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const size_t k, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsbmv(layout, triangle, n, k, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const size_t k, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsbmv(layout, triangle, n, k, alpha, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const size_t k, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const half beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXsbmv(layout, triangle, n, k, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, HalfToFloat(beta), y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSPMV/DSPMV clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& ap_buffer, const size_t ap_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const float beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSspmv(layout, triangle, n, alpha, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& ap_buffer, const size_t ap_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const double beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDspmv(layout, triangle, n, alpha, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), beta, y_buffer(), y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& ap_buffer, const size_t ap_offset, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const half beta, Buffer& y_buffer, const size_t y_offset, const size_t y_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto status = clblasXspmv(layout, triangle, n, HalfToFloat(alpha), ap_buffer_bis, ap_offset, x_buffer_bis, x_offset, x_inc, HalfToFloat(beta), y_buffer_bis, y_offset, y_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV template clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasStrmv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDtrmv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasCtrmv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasZtrmv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto status = clblasXtrmv(layout, triangle, a_transpose, diagonal, n, a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV template clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasStbmv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDtbmv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasCtbmv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasZtbmv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto status = clblasXtbmv(layout, triangle, a_transpose, diagonal, n, k, a_buffer_bis, a_offset, a_ld, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV template clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasStpmv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasDtpmv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasCtpmv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto queue = Queue(queues[0]); auto context = queue.GetContext(); auto scratch_buffer = Buffer(context, n); return clblasZtpmv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]); auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto status = clblasXtpmv(layout, triangle, a_transpose, diagonal, n, ap_buffer_bis, ap_offset, x_buffer_bis, x_offset, x_inc, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV template clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasStrsv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDtrsv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCtrsv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZtrsv(layout, triangle, a_transpose, diagonal, n, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV template clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasStbsv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDtbsv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCtbsv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const size_t k, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZtbsv(layout, triangle, a_transpose, diagonal, n, k, a_buffer(), a_offset, a_ld, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV template clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); template <> clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasStpsv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDtpsv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCtpsv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } template <> clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t n, const Buffer& ap_buffer, const size_t ap_offset, Buffer& x_buffer, const size_t x_offset, const size_t x_inc, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZtpsv(layout, triangle, a_transpose, diagonal, n, ap_buffer(), ap_offset, x_buffer(), x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SGER/DGER clblasStatus clblasXger(const clblasOrder layout, const size_t m, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSger(layout, m, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXger(const clblasOrder layout, const size_t m, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDger(layout, m, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXger(const clblasOrder layout, const size_t m, const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto status = clblasXger(layout, m, n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, a_buffer_bis, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CGERU/ZGERU clblasStatus clblasXgeru(const clblasOrder layout, const size_t m, const size_t n, const float2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCgeru(layout, m, n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgeru(const clblasOrder layout, const size_t m, const size_t n, const double2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZgeru(layout, m, n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CGERC/ZGERC clblasStatus clblasXgerc(const clblasOrder layout, const size_t m, const size_t n, const float2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCgerc(layout, m, n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgerc(const clblasOrder layout, const size_t m, const size_t n, const double2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZgerc(layout, m, n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHER/ZHER clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCher(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZher(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHPR/ZHPR clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChpr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhpr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHER2/ZHER2 clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCher2(layout, triangle, n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZher2(layout, triangle, n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for CHPR2/ZHPR2 clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChpr2(layout, triangle, n, cl_float2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double2 alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhpr2(layout, triangle, n, cl_double2{{alpha.real(), alpha.imag()}}, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SSYR/DSYR clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsyr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsyr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto status = clblasXsyr(layout, triangle, n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, a_buffer_bis, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSPR/DSPR clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSspr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDspr(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]); auto status = clblasXspr(layout, triangle, n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, ap_buffer_bis, ap_offset, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSYR2/DSYR2 clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsyr2(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsyr2(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), a_buffer(), a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& a_buffer, const size_t a_offset, const size_t a_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto status = clblasXsyr2(layout, triangle, n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, a_buffer_bis, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSPR2/DSPR2 clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const float alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSspr2(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const double alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDspr2(layout, triangle, n, alpha, x_buffer(), x_offset, static_cast(x_inc), y_buffer(), y_offset, static_cast(y_inc), ap_buffer(), ap_offset, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, const size_t n, const half alpha, const Buffer& x_buffer, const size_t x_offset, const size_t x_inc, const Buffer& y_buffer, const size_t y_offset, const size_t y_inc, Buffer& ap_buffer, const size_t ap_offset, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]); auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]); auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]); auto status = clblasXspr2(layout, triangle, n, HalfToFloat(alpha), x_buffer_bis, x_offset, x_inc, y_buffer_bis, y_offset, y_inc, ap_buffer_bis, ap_offset, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]); return status; } // ================================================================================================= // BLAS level-3 (matrix-matrix) routines // ================================================================================================= // Forwards the clBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSgemm(layout, a_transpose, b_transpose, m, n, k, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDgemm(layout, a_transpose, b_transpose, m, n, k, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, const size_t m, const size_t n, const size_t k, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCgemm(layout, a_transpose, b_transpose, m, n, k, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_float2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, const size_t m, const size_t n, const size_t k, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZgemm(layout, a_transpose, b_transpose, m, n, k, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_double2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, const size_t m, const size_t n, const size_t k, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const half beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]); auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]); auto status = clblasXgemm(layout, a_transpose, b_transpose, m, n, k, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, b_buffer_bis, b_offset, b_ld, HalfToFloat(beta), c_buffer_bis, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsymm(layout, side, triangle, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsymm(layout, side, triangle, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCsymm(layout, side, triangle, m, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_float2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZsymm(layout, side, triangle, m, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_double2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const half beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]); auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]); auto status = clblasXsymm(layout, side, triangle, m, n, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, b_buffer_bis, b_offset, b_ld, HalfToFloat(beta), c_buffer_bis, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CHEMM/ZHEMM clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasChemm(layout, side, triangle, m, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_float2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const size_t m, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZhemm(layout, side, triangle, m, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_double2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsyrk(layout, triangle, a_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsyrk(layout, triangle, a_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const float2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCsyrk(layout, triangle, a_transpose, n, k, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, cl_float2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const double2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZsyrk(layout, triangle, a_transpose, n, k, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, cl_double2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const half beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]); auto status = clblasXsyrk(layout, triangle, a_transpose, n, k, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, HalfToFloat(beta), c_buffer_bis, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CHERK/ZHERK clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCherk(layout, triangle, a_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const size_t n, const size_t k, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZherk(layout, triangle, a_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasSsyr2k(layout, triangle, ab_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDsyr2k(layout, triangle, ab_transpose, n, k, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCsyr2k(layout, triangle, ab_transpose, n, k, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_float2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZsyr2k(layout, triangle, ab_transpose, n, k, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, cl_double2{{beta.real(), beta.imag()}}, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const half beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]); auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]); auto status = clblasXsyr2k(layout, triangle, ab_transpose, n, k, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, b_buffer_bis, b_offset, b_ld, HalfToFloat(beta), c_buffer_bis, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for CHER2K/ZHER2K clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const float beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCher2k(layout, triangle, ab_transpose, n, k, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, const size_t n, const size_t k, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, const Buffer& b_buffer, const size_t b_offset, const size_t b_ld, const double beta, Buffer& c_buffer, const size_t c_offset, const size_t c_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZher2k(layout, triangle, ab_transpose, n, k, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, beta, c_buffer(), c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } // Forwards the clBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasStrmm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDtrmm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCtrmm(layout, side, triangle, a_transpose, diagonal, m, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZtrmm(layout, side, triangle, a_transpose, diagonal, m, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const half alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]); auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]); auto status = clblasXtrmm(layout, side, triangle, a_transpose, diagonal, m, n, HalfToFloat(alpha), a_buffer_bis, a_offset, a_ld, b_buffer_bis, b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); FloatToHalfBuffer(b_buffer, b_buffer_bis, queues[0]); return status; } // Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const float alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasStrsm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const double alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasDtrsm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const float2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasCtrsm(layout, side, triangle, a_transpose, diagonal, m, n, cl_float2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, const size_t m, const size_t n, const double2 alpha, const Buffer& a_buffer, const size_t a_offset, const size_t a_ld, Buffer& b_buffer, const size_t b_offset, const size_t b_ld, cl_uint num_queues, cl_command_queue *queues, cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { return clblasZtrsm(layout, side, triangle, a_transpose, diagonal, m, n, cl_double2{{alpha.real(), alpha.imag()}}, a_buffer(), a_offset, a_ld, b_buffer(), b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } // ================================================================================================= } // namespace clblast // CLBLAST_TEST_WRAPPER_CLBLAS_H_ #endif