// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file contains the Netlib CBLAS implementations to the CLBlast BLAS routines, performing buffer // copies automatically and running on the default OpenCL platform and device. For full control over // performance, it is advised to use the regular clblast.h or clblast_c.h headers instead. // // ================================================================================================= #include #include "clblast_blas.h" #include "clblast.h" #include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= // Helper function to get a default OpenCL platform and device Device get_device() { auto platform_id = ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0}); auto device_id = ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0}); auto platform = Platform(platform_id); return Device(platform, device_id); } // ================================================================================================= // BLAS level-1 (vector-vector) routines // ================================================================================================= // ROTG void cblas_srotg(float* sa, float* sb, float* sc, float* ss) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto sa_size = 1; auto sa_buffer = Buffer(context, sa_size); const auto sb_size = 1; auto sb_buffer = Buffer(context, sb_size); const auto sc_size = 1; auto sc_buffer = Buffer(context, sc_size); const auto ss_size = 1; auto ss_buffer = Buffer(context, ss_size); sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); auto queue_cl = queue(); auto s = Rotg(sa_buffer(), 0, sb_buffer(), 0, sc_buffer(), 0, ss_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); } void cblas_drotg(double* sa, double* sb, double* sc, double* ss) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto sa_size = 1; auto sa_buffer = Buffer(context, sa_size); const auto sb_size = 1; auto sb_buffer = Buffer(context, sb_size); const auto sc_size = 1; auto sc_buffer = Buffer(context, sc_size); const auto ss_size = 1; auto ss_buffer = Buffer(context, ss_size); sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); auto queue_cl = queue(); auto s = Rotg(sa_buffer(), 0, sb_buffer(), 0, sc_buffer(), 0, ss_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); } // ROTMG void cblas_srotmg(float* sd1, float* sd2, float* sx1, const float* sy1, float* sparam) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto sy1_size = 1; auto sy1_buffer = Buffer(context, sy1_size); const auto sd1_size = 1; auto sd1_buffer = Buffer(context, sd1_size); const auto sd2_size = 1; auto sd2_buffer = Buffer(context, sd2_size); const auto sx1_size = 1; auto sx1_buffer = Buffer(context, sx1_size); const auto sparam_size = 1; auto sparam_buffer = Buffer(context, sparam_size); sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1)); sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = Rotmg(sd1_buffer(), 0, sd2_buffer(), 0, sx1_buffer(), 0, sy1_buffer(), 0, sparam_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } void cblas_drotmg(double* sd1, double* sd2, double* sx1, const double* sy1, double* sparam) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto sy1_size = 1; auto sy1_buffer = Buffer(context, sy1_size); const auto sd1_size = 1; auto sd1_buffer = Buffer(context, sd1_size); const auto sd2_size = 1; auto sd2_buffer = Buffer(context, sd2_size); const auto sx1_size = 1; auto sx1_buffer = Buffer(context, sx1_size); const auto sparam_size = 1; auto sparam_buffer = Buffer(context, sparam_size); sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1)); sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = Rotmg(sd1_buffer(), 0, sd2_buffer(), 0, sx1_buffer(), 0, sy1_buffer(), 0, sparam_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } // ROT void cblas_srot(const int n, float* x, const int x_inc, float* y, const int y_inc, const float cos, const float sin) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Rot(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, cos, sin, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_drot(const int n, double* x, const int x_inc, double* y, const int y_inc, const double cos, const double sin) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Rot(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, cos, sin, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // ROTM void cblas_srotm(const int n, float* x, const int x_inc, float* y, const int y_inc, float* sparam) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto sparam_size = 1; auto sparam_buffer = Buffer(context, sparam_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = Rotm(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, sparam_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } void cblas_drotm(const int n, double* x, const int x_inc, double* y, const int y_inc, double* sparam) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto sparam_size = 1; auto sparam_buffer = Buffer(context, sparam_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = Rotm(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, sparam_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } // SWAP void cblas_sswap(const int n, float* x, const int x_inc, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dswap(const int n, double* x, const int x_inc, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cswap(const int n, void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zswap(const int n, void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SCAL void cblas_sscal(const int n, const float alpha, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dscal(const int n, const double alpha, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_cscal(const int n, const void* alpha, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_zscal(const int n, const void* alpha, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // COPY void cblas_scopy(const int n, const float* x, const int x_inc, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dcopy(const int n, const double* x, const int x_inc, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_ccopy(const int n, const void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zcopy(const int n, const void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // AXPY void cblas_saxpy(const int n, const float alpha, const float* x, const int x_inc, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_daxpy(const int n, const double alpha, const double* x, const int x_inc, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_caxpy(const int n, const void* alpha, const void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zaxpy(const int n, const void* alpha, const void* x, const int x_inc, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // DOT void cblas_sdot(const int n, float* dot, const float* x, const int x_inc, const float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dot(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } void cblas_ddot(const int n, double* dot, const double* x, const int x_inc, const double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dot(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } // DOTU void cblas_cdotu(const int n, void* dot, const void* x, const int x_inc, const void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dotu(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } void cblas_zdotu(const int n, void* dot, const void* x, const int x_inc, const void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dotu(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } // DOTC void cblas_cdotc(const int n, void* dot, const void* x, const int x_inc, const void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dotc(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } void cblas_zdotc(const int n, void* dot, const void* x, const int x_inc, const void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto dot_size = 1; auto dot_buffer = Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); auto queue_cl = queue(); auto s = Dotc(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } // NRM2 void cblas_snrm2(const int n, float* nrm2, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto nrm2_size = 1; auto nrm2_buffer = Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); auto queue_cl = queue(); auto s = Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); } void cblas_dnrm2(const int n, double* nrm2, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto nrm2_size = 1; auto nrm2_buffer = Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); auto queue_cl = queue(); auto s = Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); } void cblas_scnrm2(const int n, void* nrm2, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto nrm2_size = 1; auto nrm2_buffer = Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); auto queue_cl = queue(); auto s = Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); } void cblas_dznrm2(const int n, void* nrm2, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto nrm2_size = 1; auto nrm2_buffer = Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); auto queue_cl = queue(); auto s = Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); } // ASUM void cblas_sasum(const int n, float* asum, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto asum_size = 1; auto asum_buffer = Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); auto queue_cl = queue(); auto s = Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); } void cblas_dasum(const int n, double* asum, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto asum_size = 1; auto asum_buffer = Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); auto queue_cl = queue(); auto s = Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); } void cblas_scasum(const int n, void* asum, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto asum_size = 1; auto asum_buffer = Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); auto queue_cl = queue(); auto s = Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); } void cblas_dzasum(const int n, void* asum, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto asum_size = 1; auto asum_buffer = Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); auto queue_cl = queue(); auto s = Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); } // SUM void cblas_ssum(const int n, float* sum, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto sum_size = 1; auto sum_buffer = Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); auto queue_cl = queue(); auto s = Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); } void cblas_dsum(const int n, double* sum, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto sum_size = 1; auto sum_buffer = Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); auto queue_cl = queue(); auto s = Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); } void cblas_scsum(const int n, void* sum, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto sum_size = 1; auto sum_buffer = Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); auto queue_cl = queue(); auto s = Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); } void cblas_dzsum(const int n, void* sum, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto sum_size = 1; auto sum_buffer = Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); auto queue_cl = queue(); auto s = Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); } // AMAX void cblas_isamax(const int n, float* imax, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_idamax(const int n, double* imax, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_icamax(const int n, void* imax, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_izamax(const int n, void* imax, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } // MAX void cblas_ismax(const int n, float* imax, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_idmax(const int n, double* imax, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_icmax(const int n, void* imax, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } void cblas_izmax(const int n, void* imax, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imax_size = 1; auto imax_buffer = Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); auto queue_cl = queue(); auto s = Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); } // MIN void cblas_ismin(const int n, float* imin, const float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imin_size = 1; auto imin_buffer = Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); auto queue_cl = queue(); auto s = Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); } void cblas_idmin(const int n, double* imin, const double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imin_size = 1; auto imin_buffer = Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); auto queue_cl = queue(); auto s = Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); } void cblas_icmin(const int n, void* imin, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imin_size = 1; auto imin_buffer = Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); auto queue_cl = queue(); auto s = Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); } void cblas_izmin(const int n, void* imin, const void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto imin_size = 1; auto imin_buffer = Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); auto queue_cl = queue(); auto s = Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); } // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= // GEMV void cblas_sgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // GBMV void cblas_sgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HEMV void cblas_chemv(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hemv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhemv(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hemv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HBMV void cblas_chbmv(const Layout layout, const Triangle triangle, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhbmv(const Layout layout, const Triangle triangle, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HPMV void cblas_chpmv(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* ap, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hpmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhpmv(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* ap, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Hpmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SYMV void cblas_ssymv(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Symv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dsymv(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Symv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SBMV void cblas_ssbmv(const Layout layout, const Triangle triangle, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Sbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dsbmv(const Layout layout, const Triangle triangle, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Sbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SPMV void cblas_sspmv(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* ap, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Spmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dspmv(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* ap, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = Spmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // TRMV void cblas_strmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const float* a, const int a_ld, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const double* a, const int a_ld, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TBMV void cblas_stbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const float* a, const int a_ld, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const double* a, const int a_ld, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TPMV void cblas_stpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const float* ap, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const double* ap, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TRSV void cblas_strsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const float* a, const int a_ld, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const double* a, const int a_ld, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TBSV void cblas_stbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const float* a, const int a_ld, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const double* a, const int a_ld, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TPSV void cblas_stpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const float* ap, float* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const double* ap, double* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); const auto x_size = n; auto x_buffer = Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // GER void cblas_sger(const Layout layout, const int m, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Ger(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dger(const Layout layout, const int m, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Ger(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // GERU void cblas_cgeru(const Layout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Geru(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zgeru(const Layout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Geru(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // GERC void cblas_cgerc(const Layout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Gerc(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zgerc(const Layout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Gerc(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HER void cblas_cher(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Her(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zher(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Her(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HPR void cblas_chpr(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, void* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Hpr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_zhpr(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, void* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Hpr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // HER2 void cblas_cher2(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Her2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zher2(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Her2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HPR2 void cblas_chpr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Hpr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_zhpr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Hpr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // SYR void cblas_ssyr(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* x, const int x_inc, float* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Syr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dsyr(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* x, const int x_inc, double* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Syr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // SPR void cblas_sspr(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* x, const int x_inc, float* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Spr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_dspr(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* x, const int x_inc, double* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Spr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // SYR2 void cblas_ssyr2(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Syr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dsyr2(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* a, const int a_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto a_size = n; auto a_buffer = Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = Syr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // SPR2 void cblas_sspr2(const Layout layout, const Triangle triangle, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Spr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_dspr2(const Layout layout, const Triangle triangle, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* ap) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n; auto x_buffer = Buffer(context, x_size); const auto y_size = n; auto y_buffer = Buffer(context, y_size); const auto ap_size = n; auto ap_buffer = Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = Spr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // ================================================================================================= // BLAS level-3 (matrix-matrix) routines // ================================================================================================= // GEMM void cblas_sgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_cgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYMM void cblas_ssymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HEMM void cblas_chemm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Hemm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zhemm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Hemm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYRK void cblas_ssyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const float alpha, const float* a, const int a_ld, const float beta, float* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const double alpha, const double* a, const int a_ld, const double beta, double* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HERK void cblas_cherk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Herk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zherk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Herk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYR2K void cblas_ssyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HER2K void cblas_cher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Her2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = beta; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); const auto c_size = n; auto c_buffer = Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = Her2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // TRMM void cblas_strmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_dtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ctrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ztrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // TRSM void cblas_strsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_dtrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ctrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ztrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // ================================================================================================= // Extra non-BLAS routines (level-X) // ================================================================================================= // OMATCOPY void cblas_somatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_domatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_comatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_zomatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { auto device = get_device(); auto context = Context(device); auto queue = Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = n; auto a_buffer = Buffer(context, a_size); const auto b_size = n; auto b_buffer = Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // ================================================================================================= } // namespace clblast