// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file implements all the BLAS API calls. In all cases, it does not much more than creating // a new object of the appropriate type, and calling the main routine on that object. It forwards // all status codes to the caller. // // ================================================================================================= #include #include "clblast.h" #include "internal/public_api.h" // BLAS level-1 includes #include "internal/routines/level1/xswap.h" #include "internal/routines/level1/xscal.h" #include "internal/routines/level1/xcopy.h" #include "internal/routines/level1/xaxpy.h" #include "internal/routines/level1/xdot.h" #include "internal/routines/level1/xdotu.h" #include "internal/routines/level1/xdotc.h" // BLAS level-2 includes #include "internal/routines/level2/xgemv.h" #include "internal/routines/level2/xgbmv.h" #include "internal/routines/level2/xhemv.h" #include "internal/routines/level2/xhbmv.h" #include "internal/routines/level2/xhpmv.h" #include "internal/routines/level2/xsymv.h" #include "internal/routines/level2/xsbmv.h" #include "internal/routines/level2/xspmv.h" #include "internal/routines/level2/xtrmv.h" #include "internal/routines/level2/xtbmv.h" #include "internal/routines/level2/xtpmv.h" #include "internal/routines/level2/xger.h" #include "internal/routines/level2/xgeru.h" #include "internal/routines/level2/xgerc.h" #include "internal/routines/level2/xher.h" #include "internal/routines/level2/xhpr.h" #include "internal/routines/level2/xher2.h" #include "internal/routines/level2/xhpr2.h" #include "internal/routines/level2/xsyr.h" #include "internal/routines/level2/xspr.h" #include "internal/routines/level2/xsyr2.h" #include "internal/routines/level2/xspr2.h" // BLAS level-3 includes #include "internal/routines/level3/xgemm.h" #include "internal/routines/level3/xsymm.h" #include "internal/routines/level3/xhemm.h" #include "internal/routines/level3/xsyrk.h" #include "internal/routines/level3/xherk.h" #include "internal/routines/level3/xsyr2k.h" #include "internal/routines/level3/xher2k.h" #include "internal/routines/level3/xtrmm.h" namespace clblast { // ================================================================================================= // BLAS level-1 (vector-vector) routines // ================================================================================================= // Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP template StatusCode Swap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xswap(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSwap(n, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Swap(const size_t, cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Swap(const size_t, cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Swap(const size_t, cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Swap(const size_t, cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL template StatusCode Scal(const size_t n, const T alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xscal(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoScal(n, alpha, Buffer(x_buffer), x_offset, x_inc); } template StatusCode PUBLIC_API Scal(const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Scal(const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Scal(const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Scal(const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY template StatusCode Copy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xcopy(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoCopy(n, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Copy(const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Copy(const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Copy(const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Copy(const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY template StatusCode Axpy(const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xaxpy(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoAxpy(n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Axpy(const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Axpy(const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Axpy(const size_t, const float2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Axpy(const size_t, const double2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Dot product of two vectors: SDOT/DDOT template StatusCode Dot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xdot(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoDot(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Dot(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Dot(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Dot product of two complex vectors: CDOTU/ZDOTU template StatusCode Dotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xdotu(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoDotu(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Dotu(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Dotu(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC template StatusCode Dotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xdotc(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoDotc(n, Buffer(dot_buffer), dot_offset, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Dotc(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Dotc(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= // General matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV template StatusCode Gemv(const Layout layout, const Transpose a_transpose, const size_t m, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xgemv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGemv(layout, a_transpose, m, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemv(const Layout, const Transpose, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV template StatusCode Gbmv(const Layout layout, const Transpose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xgbmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGbmv(layout, a_transpose, m, n, kl, ku, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gbmv(const Layout, const Transpose, const size_t, const size_t, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian matrix-vector multiplication: CHEMV/ZHEMV template StatusCode Hemv(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhemv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHemv(layout, triangle, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Hemv(const Layout, const Triangle, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hemv(const Layout, const Triangle, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV template StatusCode Hbmv(const Layout layout, const Triangle triangle, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhbmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHbmv(layout, triangle, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Hbmv(const Layout, const Triangle, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hbmv(const Layout, const Triangle, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV template StatusCode Hpmv(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhpmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHpmv(layout, triangle, n, alpha, Buffer(ap_buffer), ap_offset, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Hpmv(const Layout, const Triangle, const size_t, const float2, const cl_mem, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hpmv(const Layout, const Triangle, const size_t, const double2, const cl_mem, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric matrix-vector multiplication: SSYMV/DSYMV template StatusCode Symv(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsymv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSymv(layout, triangle, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Symv(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Symv(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric banded matrix-vector multiplication: SSBMV/DSBMV template StatusCode Sbmv(const Layout layout, const Triangle triangle, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsbmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSbmv(layout, triangle, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Sbmv(const Layout, const Triangle, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV template StatusCode Spmv(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xspmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSpmv(layout, triangle, n, alpha, Buffer(ap_buffer), ap_offset, Buffer(x_buffer), x_offset, x_inc, beta, Buffer(y_buffer), y_offset, y_inc); } template StatusCode PUBLIC_API Spmv(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Spmv(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV template StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xtrmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoTrmv(layout, triangle, a_transpose, diagonal, n, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc); } template StatusCode PUBLIC_API Trmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV template StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xtbmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoTbmv(layout, triangle, a_transpose, diagonal, n, k, Buffer(a_buffer), a_offset, a_ld, Buffer(x_buffer), x_offset, x_inc); } template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV template StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xtpmv(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoTpmv(layout, triangle, a_transpose, diagonal, n, Buffer(ap_buffer), ap_offset, Buffer(x_buffer), x_offset, x_inc); } template StatusCode PUBLIC_API Tpmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpmv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV template StatusCode Trsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*) { return StatusCode::kNotImplemented; } template StatusCode PUBLIC_API Trsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV template StatusCode Tbsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*) { return StatusCode::kNotImplemented; } template StatusCode PUBLIC_API Tbsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tbsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV template StatusCode Tpsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*) { return StatusCode::kNotImplemented; } template StatusCode PUBLIC_API Tpsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Tpsv(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, const cl_mem, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // General rank-1 matrix update: SGER/DGER template StatusCode Ger(const Layout layout, const size_t m, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xger(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGer(layout, m, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Ger(const Layout, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Ger(const Layout, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // General rank-1 complex matrix update: CGERU/ZGERU template StatusCode Geru(const Layout layout, const size_t m, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xgeru(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGeru(layout, m, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Geru(const Layout, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Geru(const Layout, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // General rank-1 complex conjugated matrix update: CGERC/ZGERC template StatusCode Gerc(const Layout layout, const size_t m, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xgerc(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGerc(layout, m, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Gerc(const Layout, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gerc(const Layout, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian rank-1 matrix update: CHER/ZHER template StatusCode Her(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xher,T>(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHer(layout, triangle, n, alpha, Buffer>(x_buffer), x_offset, x_inc, Buffer>(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Her(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Her(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian packed rank-1 matrix update: CHPR/ZHPR template StatusCode Hpr(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhpr,T>(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHpr(layout, triangle, n, alpha, Buffer>(x_buffer), x_offset, x_inc, Buffer>(ap_buffer), ap_offset); } template StatusCode PUBLIC_API Hpr(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hpr(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); // Hermitian rank-2 matrix update: CHER2/ZHER2 template StatusCode Her2(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xher2(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHer2(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Her2(const Layout, const Triangle, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Her2(const Layout, const Triangle, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 template StatusCode Hpr2(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhpr2(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHpr2(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(ap_buffer), ap_offset); } template StatusCode PUBLIC_API Hpr2(const Layout, const Triangle, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hpr2(const Layout, const Triangle, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); // Symmetric rank-1 matrix update: SSYR/DSYR template StatusCode Syr(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsyr(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSyr(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Syr(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syr(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric packed rank-1 matrix update: SSPR/DSPR template StatusCode Spr(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xspr(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSpr(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(ap_buffer), ap_offset); } template StatusCode PUBLIC_API Spr(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Spr(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); // Symmetric rank-2 matrix update: SSYR2/DSYR2 template StatusCode Syr2(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsyr2(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSyr2(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(a_buffer), a_offset, a_ld); } template StatusCode PUBLIC_API Syr2(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syr2(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric packed rank-2 matrix update: SSPR2/DSPR2 template StatusCode Spr2(const Layout layout, const Triangle triangle, const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xspr2(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSpr2(layout, triangle, n, alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc, Buffer(ap_buffer), ap_offset); } template StatusCode PUBLIC_API Spr2(const Layout, const Triangle, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Spr2(const Layout, const Triangle, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); // ================================================================================================= // BLAS level-3 (matrix-matrix) routines // ================================================================================================= // General matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM template StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const size_t m, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xgemm(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoGemm(layout, a_transpose, b_transpose, m, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Gemm(const Layout, const Transpose, const Transpose, const size_t, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemm(const Layout, const Transpose, const Transpose, const size_t, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemm(const Layout, const Transpose, const Transpose, const size_t, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Gemm(const Layout, const Transpose, const Transpose, const size_t, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM template StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsymm(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSymm(layout, side, triangle, m, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Symm(const Layout, const Side, const Triangle, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Symm(const Layout, const Side, const Triangle, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Symm(const Layout, const Side, const Triangle, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Symm(const Layout, const Side, const Triangle, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Hermitian matrix-matrix multiplication: CHEMM/ZHEMM template StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, const size_t m, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xhemm(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHemm(layout, side, triangle, m, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Hemm(const Layout, const Side, const Triangle, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Hemm(const Layout, const Side, const Triangle, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK template StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsyrk(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSyrk(layout, triangle, a_transpose, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Syrk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syrk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syrk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syrk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Rank-K update of a hermitian matrix: CHERK/ZHERK template StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xherk,T>(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHerk(layout, triangle, a_transpose, n, k, alpha, Buffer>(a_buffer), a_offset, a_ld, beta, Buffer>(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Herk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Herk(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K template StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xsyr2k(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoSyr2k(layout, triangle, ab_transpose, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Syr2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syr2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syr2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Syr2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Rank-2K update of a hermitian matrix: CHER2K/ZHER2K template StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const size_t n, const size_t k, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const U beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xher2k(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoHer2k(layout, triangle, ab_transpose, n, k, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld, beta, Buffer(c_buffer), c_offset, c_ld); } template StatusCode PUBLIC_API Her2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const float, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Her2k(const Layout, const Triangle, const Transpose, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, const double, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM template StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const size_t m, const size_t n, const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xtrmm(queue_cpp, event_cpp); auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } return routine.DoTrmm(layout, side, triangle, a_transpose, diagonal, m, n, alpha, Buffer(a_buffer), a_offset, a_ld, Buffer(b_buffer), b_offset, b_ld); } template StatusCode PUBLIC_API Trmm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trmm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM template StatusCode Trsm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const T, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*) { return StatusCode::kNotImplemented; } template StatusCode PUBLIC_API Trsm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const float2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); template StatusCode PUBLIC_API Trsm(const Layout, const Side, const Triangle, const Transpose, const Diagonal, const size_t, const size_t, const double2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); // ================================================================================================= } // namespace clblast