From 8ae8ab06a2b6f24faa0de5d390a5ae272aa94c23 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 25 Oct 2016 20:33:10 +0200 Subject: Renamed the include and source files of the Netlib CBLAS API --- src/clblast_netlib_c.cpp | 4650 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 4650 insertions(+) create mode 100644 src/clblast_netlib_c.cpp (limited to 'src/clblast_netlib_c.cpp') diff --git a/src/clblast_netlib_c.cpp b/src/clblast_netlib_c.cpp new file mode 100644 index 00000000..203a3423 --- /dev/null +++ b/src/clblast_netlib_c.cpp @@ -0,0 +1,4650 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains the Netlib CBLAS implementations to the CLBlast BLAS routines, performing buffer +// copies automatically and running on the default OpenCL platform and device. For full control over +// performance, it is advised to use the regular clblast.h or clblast_c.h headers instead. +// +// ================================================================================================= + +#include + +#include "clblast_netlib_c.h" +#include "clblast.h" +#include "utilities/utilities.hpp" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Helper function to get a default OpenCL platform and device +clblast::Device get_device() { + auto platform_id = clblast::ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0}); + auto device_id = clblast::ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0}); + auto platform = clblast::Platform(platform_id); + return clblast::Device(platform, device_id); +} + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines +// ================================================================================================= + +// ROTG +void cblas_srotg(float* sa, + float* sb, + float* sc, + float* ss) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto sa_size = 1; + const auto sb_size = 1; + const auto sc_size = 1; + const auto ss_size = 1; + auto sa_buffer = clblast::Buffer(context, sa_size); + auto sb_buffer = clblast::Buffer(context, sb_size); + auto sc_buffer = clblast::Buffer(context, sc_size); + auto ss_buffer = clblast::Buffer(context, ss_size); + sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); + sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); + sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); + ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); + auto queue_cl = queue(); + auto s = clblast::Rotg(sa_buffer(), 0, + sb_buffer(), 0, + sc_buffer(), 0, + ss_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); + sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); + sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); + ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); +} +void cblas_drotg(double* sa, + double* sb, + double* sc, + double* ss) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto sa_size = 1; + const auto sb_size = 1; + const auto sc_size = 1; + const auto ss_size = 1; + auto sa_buffer = clblast::Buffer(context, sa_size); + auto sb_buffer = clblast::Buffer(context, sb_size); + auto sc_buffer = clblast::Buffer(context, sc_size); + auto ss_buffer = clblast::Buffer(context, ss_size); + sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); + sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); + sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); + ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); + auto queue_cl = queue(); + auto s = clblast::Rotg(sa_buffer(), 0, + sb_buffer(), 0, + sc_buffer(), 0, + ss_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); + sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); + sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); + ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); +} + +// ROTMG +void cblas_srotmg(float* sd1, + float* sd2, + float* sx1, + const float* sy1, + float* sparam) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto sy1_size = 1; + const auto sd1_size = 1; + const auto sd2_size = 1; + const auto sx1_size = 1; + const auto sparam_size = 1; + auto sy1_buffer = clblast::Buffer(context, sy1_size); + auto sd1_buffer = clblast::Buffer(context, sd1_size); + auto sd2_buffer = clblast::Buffer(context, sd2_size); + auto sx1_buffer = clblast::Buffer(context, sx1_size); + auto sparam_buffer = clblast::Buffer(context, sparam_size); + sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1)); + sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); + sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); + sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); + sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); + auto queue_cl = queue(); + auto s = clblast::Rotmg(sd1_buffer(), 0, + sd2_buffer(), 0, + sx1_buffer(), 0, + sy1_buffer(), 0, + sparam_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); + sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); + sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); + sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); +} +void cblas_drotmg(double* sd1, + double* sd2, + double* sx1, + const double* sy1, + double* sparam) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto sy1_size = 1; + const auto sd1_size = 1; + const auto sd2_size = 1; + const auto sx1_size = 1; + const auto sparam_size = 1; + auto sy1_buffer = clblast::Buffer(context, sy1_size); + auto sd1_buffer = clblast::Buffer(context, sd1_size); + auto sd2_buffer = clblast::Buffer(context, sd2_size); + auto sx1_buffer = clblast::Buffer(context, sx1_size); + auto sparam_buffer = clblast::Buffer(context, sparam_size); + sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1)); + sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); + sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); + sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); + sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); + auto queue_cl = queue(); + auto s = clblast::Rotmg(sd1_buffer(), 0, + sd2_buffer(), 0, + sx1_buffer(), 0, + sy1_buffer(), 0, + sparam_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); + sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); + sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); + sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); +} + +// ROT +void cblas_srot(const int n, + float* x, const int x_inc, + float* y, const int y_inc, + const float cos, + const float sin) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Rot(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + cos, + sin, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_drot(const int n, + double* x, const int x_inc, + double* y, const int y_inc, + const double cos, + const double sin) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Rot(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + cos, + sin, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// ROTM +void cblas_srotm(const int n, + float* x, const int x_inc, + float* y, const int y_inc, + float* sparam) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto sparam_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto sparam_buffer = clblast::Buffer(context, sparam_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); + auto queue_cl = queue(); + auto s = clblast::Rotm(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + sparam_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); + sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); +} +void cblas_drotm(const int n, + double* x, const int x_inc, + double* y, const int y_inc, + double* sparam) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto sparam_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto sparam_buffer = clblast::Buffer(context, sparam_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); + auto queue_cl = queue(); + auto s = clblast::Rotm(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + sparam_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); + sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); +} + +// SWAP +void cblas_sswap(const int n, + float* x, const int x_inc, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Swap(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dswap(const int n, + double* x, const int x_inc, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Swap(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_cswap(const int n, + void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Swap(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zswap(const int n, + void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Swap(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// SCAL +void cblas_sscal(const int n, + const float alpha, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Scal(n, + alpha_cpp, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dscal(const int n, + const double alpha, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Scal(n, + alpha_cpp, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_cscal(const int n, + const void* alpha, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Scal(n, + alpha_cpp, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_zscal(const int n, + const void* alpha, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Scal(n, + alpha_cpp, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// COPY +void cblas_scopy(const int n, + const float* x, const int x_inc, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Copy(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dcopy(const int n, + const double* x, const int x_inc, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Copy(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_ccopy(const int n, + const void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Copy(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zcopy(const int n, + const void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Copy(n, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// AXPY +void cblas_saxpy(const int n, + const float alpha, + const float* x, const int x_inc, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Axpy(n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_daxpy(const int n, + const double alpha, + const double* x, const int x_inc, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Axpy(n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_caxpy(const int n, + const void* alpha, + const void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Axpy(n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zaxpy(const int n, + const void* alpha, + const void* x, const int x_inc, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n; + const auto y_size = n; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Axpy(n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// DOT +void cblas_sdot(const int n, + float* dot, + const float* x, const int x_inc, + const float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dot(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} +void cblas_ddot(const int n, + double* dot, + const double* x, const int x_inc, + const double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dot(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} + +// DOTU +void cblas_cdotu(const int n, + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dotu(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} +void cblas_zdotu(const int n, + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dotu(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} + +// DOTC +void cblas_cdotc(const int n, + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dotc(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} +void cblas_zdotc(const int n, + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto y_size = n; + const auto dot_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto dot_buffer = clblast::Buffer(context, dot_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + dot_buffer.Write(queue, dot_size, reinterpret_cast(dot)); + auto queue_cl = queue(); + auto s = clblast::Dotc(n, + dot_buffer(), 0, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); +} + +// NRM2 +void cblas_snrm2(const int n, + float* nrm2, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto nrm2_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto nrm2_buffer = clblast::Buffer(context, nrm2_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); + auto queue_cl = queue(); + auto s = clblast::Nrm2(n, + nrm2_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); +} +void cblas_dnrm2(const int n, + double* nrm2, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto nrm2_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto nrm2_buffer = clblast::Buffer(context, nrm2_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); + auto queue_cl = queue(); + auto s = clblast::Nrm2(n, + nrm2_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); +} +void cblas_scnrm2(const int n, + void* nrm2, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto nrm2_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto nrm2_buffer = clblast::Buffer(context, nrm2_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); + auto queue_cl = queue(); + auto s = clblast::Nrm2(n, + nrm2_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); +} +void cblas_dznrm2(const int n, + void* nrm2, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto nrm2_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto nrm2_buffer = clblast::Buffer(context, nrm2_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + nrm2_buffer.Write(queue, nrm2_size, reinterpret_cast(nrm2)); + auto queue_cl = queue(); + auto s = clblast::Nrm2(n, + nrm2_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); +} + +// ASUM +void cblas_sasum(const int n, + float* asum, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto asum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto asum_buffer = clblast::Buffer(context, asum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); + auto queue_cl = queue(); + auto s = clblast::Asum(n, + asum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); +} +void cblas_dasum(const int n, + double* asum, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto asum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto asum_buffer = clblast::Buffer(context, asum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); + auto queue_cl = queue(); + auto s = clblast::Asum(n, + asum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); +} +void cblas_scasum(const int n, + void* asum, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto asum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto asum_buffer = clblast::Buffer(context, asum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); + auto queue_cl = queue(); + auto s = clblast::Asum(n, + asum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); +} +void cblas_dzasum(const int n, + void* asum, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto asum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto asum_buffer = clblast::Buffer(context, asum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + asum_buffer.Write(queue, asum_size, reinterpret_cast(asum)); + auto queue_cl = queue(); + auto s = clblast::Asum(n, + asum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); +} + +// SUM +void cblas_ssum(const int n, + float* sum, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto sum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto sum_buffer = clblast::Buffer(context, sum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); + auto queue_cl = queue(); + auto s = clblast::Sum(n, + sum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); +} +void cblas_dsum(const int n, + double* sum, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto sum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto sum_buffer = clblast::Buffer(context, sum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); + auto queue_cl = queue(); + auto s = clblast::Sum(n, + sum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); +} +void cblas_scsum(const int n, + void* sum, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto sum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto sum_buffer = clblast::Buffer(context, sum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); + auto queue_cl = queue(); + auto s = clblast::Sum(n, + sum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); +} +void cblas_dzsum(const int n, + void* sum, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto sum_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto sum_buffer = clblast::Buffer(context, sum_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + sum_buffer.Write(queue, sum_size, reinterpret_cast(sum)); + auto queue_cl = queue(); + auto s = clblast::Sum(n, + sum_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); +} + +// AMAX +void cblas_isamax(const int n, + float* imax, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Amax(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_idamax(const int n, + double* imax, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Amax(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_icamax(const int n, + void* imax, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Amax(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_izamax(const int n, + void* imax, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Amax(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} + +// MAX +void cblas_ismax(const int n, + float* imax, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Max(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_idmax(const int n, + double* imax, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Max(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_icmax(const int n, + void* imax, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Max(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} +void cblas_izmax(const int n, + void* imax, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imax_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imax_buffer = clblast::Buffer(context, imax_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imax_buffer.Write(queue, imax_size, reinterpret_cast(imax)); + auto queue_cl = queue(); + auto s = clblast::Max(n, + imax_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); +} + +// MIN +void cblas_ismin(const int n, + float* imin, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imin_buffer = clblast::Buffer(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); + auto queue_cl = queue(); + auto s = clblast::Min(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); +} +void cblas_idmin(const int n, + double* imin, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imin_buffer = clblast::Buffer(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); + auto queue_cl = queue(); + auto s = clblast::Min(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); +} +void cblas_icmin(const int n, + void* imin, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imin_buffer = clblast::Buffer(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); + auto queue_cl = queue(); + auto s = clblast::Min(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); +} +void cblas_izmin(const int n, + void* imin, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer(context, x_size); + auto imin_buffer = clblast::Buffer(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + imin_buffer.Write(queue, imin_size, reinterpret_cast(imin)); + auto queue_cl = queue(); + auto s = clblast::Min(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); +} + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines +// ================================================================================================= + +// GEMV +void cblas_sgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const float alpha, + const float* a, const int a_ld, + const float* x, const int x_inc, + const float beta, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gemv(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const double alpha, + const double* a, const int a_ld, + const double* x, const int x_inc, + const double beta, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gemv(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_cgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gemv(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gemv(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// GBMV +void cblas_sgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, const int kl, const int ku, + const float alpha, + const float* a, const int a_ld, + const float* x, const int x_inc, + const float beta, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gbmv(static_cast(layout), + static_cast(a_transpose), + m, n, kl, ku, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, const int kl, const int ku, + const double alpha, + const double* a, const int a_ld, + const double* x, const int x_inc, + const double beta, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gbmv(static_cast(layout), + static_cast(a_transpose), + m, n, kl, ku, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_cgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, const int kl, const int ku, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gbmv(static_cast(layout), + static_cast(a_transpose), + m, n, kl, ku, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, const int kl, const int ku, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; + const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Gbmv(static_cast(layout), + static_cast(a_transpose), + m, n, kl, ku, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// HEMV +void cblas_chemv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hemv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hemv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// HBMV +void cblas_chbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hbmv(static_cast(layout), + static_cast(triangle), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hbmv(static_cast(layout), + static_cast(triangle), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// HPMV +void cblas_chpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* ap, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hpmv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_zhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* ap, + const void* x, const int x_inc, + const void* beta, + void* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Hpmv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// SYMV +void cblas_ssymv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* a, const int a_ld, + const float* x, const int x_inc, + const float beta, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Symv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* a, const int a_ld, + const double* x, const int x_inc, + const double beta, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Symv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// SBMV +void cblas_ssbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, const int k, + const float alpha, + const float* a, const int a_ld, + const float* x, const int x_inc, + const float beta, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Sbmv(static_cast(layout), + static_cast(triangle), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, const int k, + const double alpha, + const double* a, const int a_ld, + const double* x, const int x_inc, + const double beta, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Sbmv(static_cast(layout), + static_cast(triangle), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// SPMV +void cblas_sspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* ap, + const float* x, const int x_inc, + const float beta, + float* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Spmv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} +void cblas_dspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* ap, + const double* x, const int x_inc, + const double beta, + double* y, const int y_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + auto queue_cl = queue(); + auto s = clblast::Spmv(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + beta_cpp, + y_buffer(), 0, y_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + y_buffer.Read(queue, y_size, reinterpret_cast(y)); +} + +// TRMV +void cblas_strmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const float* a, const int a_ld, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const double* a, const int a_ld, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// TBMV +void cblas_stbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const float* a, const int a_ld, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const double* a, const int a_ld, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// TPMV +void cblas_stpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const float* ap, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const double* ap, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* ap, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* ap, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpmv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// TRSV +void cblas_strsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const float* a, const int a_ld, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const double* a, const int a_ld, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Trsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// TBSV +void cblas_stbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const float* a, const int a_ld, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const double* a, const int a_ld, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, const int k, + const void* a, const int a_ld, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto a_size = n * a_ld; + const auto x_size = n * x_inc; + auto a_buffer = clblast::Buffer(context, a_size); + auto x_buffer = clblast::Buffer(context, x_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tbsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, k, + a_buffer(), 0, a_ld, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// TPSV +void cblas_stpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const float* ap, + float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_dtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const double* ap, + double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ctpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* ap, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} +void cblas_ztpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int n, + const void* ap, + void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto ap_size = ((n*(n+1)) / 2); + const auto x_size = n * x_inc; + auto ap_buffer = clblast::Buffer(context, ap_size); + auto x_buffer = clblast::Buffer(context, x_size); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + auto queue_cl = queue(); + auto s = clblast::Tpsv(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + n, + ap_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + x_buffer.Read(queue, x_size, reinterpret_cast(x)); +} + +// GER +void cblas_sger(const CLBlastLayout layout, + const int m, const int n, + const float alpha, + const float* x, const int x_inc, + const float* y, const int y_inc, + float* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Ger(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_dger(const CLBlastLayout layout, + const int m, const int n, + const double alpha, + const double* x, const int x_inc, + const double* y, const int y_inc, + double* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Ger(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// GERU +void cblas_cgeru(const CLBlastLayout layout, + const int m, const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Geru(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_zgeru(const CLBlastLayout layout, + const int m, const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Geru(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// GERC +void cblas_cgerc(const CLBlastLayout layout, + const int m, const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Gerc(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_zgerc(const CLBlastLayout layout, + const int m, const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = m * x_inc; + const auto y_size = n * y_inc; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Gerc(static_cast(layout), + m, n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// HER +void cblas_cher(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const void* x, const int x_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Her(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_zher(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const void* x, const int x_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Her(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// HPR +void cblas_chpr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const void* x, const int x_inc, + void* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Hpr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} +void cblas_zhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const void* x, const int x_inc, + void* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Hpr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} + +// HER2 +void cblas_cher2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Her2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_zher2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Her2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// HPR2 +void cblas_chpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Hpr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} +void cblas_zhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + void* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Hpr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} + +// SYR +void cblas_ssyr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* x, const int x_inc, + float* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Syr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_dsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* x, const int x_inc, + double* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Syr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// SPR +void cblas_sspr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* x, const int x_inc, + float* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Spr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} +void cblas_dspr(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* x, const int x_inc, + double* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Spr(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} + +// SYR2 +void cblas_ssyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* x, const int x_inc, + const float* y, const int y_inc, + float* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Syr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} +void cblas_dsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* x, const int x_inc, + const double* y, const int y_inc, + double* a, const int a_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto a_size = n * a_ld; + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto a_buffer = clblast::Buffer(context, a_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + auto queue_cl = queue(); + auto s = clblast::Syr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + a_buffer(), 0, a_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + a_buffer.Read(queue, a_size, reinterpret_cast(a)); +} + +// SPR2 +void cblas_sspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const float alpha, + const float* x, const int x_inc, + const float* y, const int y_inc, + float* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Spr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} +void cblas_dspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, + const int n, + const double alpha, + const double* x, const int x_inc, + const double* y, const int y_inc, + double* ap) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto x_size = n * x_inc; + const auto y_size = n * y_inc; + const auto ap_size = ((n*(n+1)) / 2); + auto x_buffer = clblast::Buffer(context, x_size); + auto y_buffer = clblast::Buffer(context, y_size); + auto ap_buffer = clblast::Buffer(context, ap_size); + x_buffer.Write(queue, x_size, reinterpret_cast(x)); + y_buffer.Write(queue, y_size, reinterpret_cast(y)); + ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); + auto queue_cl = queue(); + auto s = clblast::Spr2(static_cast(layout), + static_cast(triangle), + n, + alpha_cpp, + x_buffer(), 0, x_inc, + y_buffer(), 0, y_inc, + ap_buffer(), 0, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); +} + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines +// ================================================================================================= + +// GEMM +void cblas_sgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const int m, const int n, const int k, + const float alpha, + const float* a, const int a_ld, + const float* b, const int b_ld, + const float beta, + float* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Gemm(static_cast(layout), + static_cast(a_transpose), + static_cast(b_transpose), + m, n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_dgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const int m, const int n, const int k, + const double alpha, + const double* a, const int a_ld, + const double* b, const int b_ld, + const double beta, + double* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Gemm(static_cast(layout), + static_cast(a_transpose), + static_cast(b_transpose), + m, n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_cgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const int m, const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Gemm(static_cast(layout), + static_cast(a_transpose), + static_cast(b_transpose), + m, n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, + const int m, const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Gemm(static_cast(layout), + static_cast(a_transpose), + static_cast(b_transpose), + m, n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// SYMM +void cblas_ssymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const float alpha, + const float* a, const int a_ld, + const float* b, const int b_ld, + const float beta, + float* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Symm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_dsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const double alpha, + const double* a, const int a_ld, + const double* b, const int b_ld, + const double beta, + double* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Symm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_csymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Symm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Symm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// HEMM +void cblas_chemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Hemm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; + const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Hemm(static_cast(layout), + static_cast(side), + static_cast(triangle), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// SYRK +void cblas_ssyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const float alpha, + const float* a, const int a_ld, + const float beta, + float* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syrk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_dsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const double alpha, + const double* a, const int a_ld, + const double beta, + double* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syrk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_csyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syrk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syrk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// HERK +void cblas_cherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const float alpha, + const void* a, const int a_ld, + const float beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Herk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, + const int n, const int k, + const double alpha, + const void* a, const int a_ld, + const double beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Herk(static_cast(layout), + static_cast(triangle), + static_cast(a_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// SYR2K +void cblas_ssyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const float alpha, + const float* a, const int a_ld, + const float* b, const int b_ld, + const float beta, + float* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syr2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_dsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const double alpha, + const double* a, const int a_ld, + const double* b, const int b_ld, + const double beta, + double* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syr2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_csyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syr2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Syr2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// HER2K +void cblas_cher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const float beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Her2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} +void cblas_zher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, + const int n, const int k, + const void* alpha, + const void* a, const int a_ld, + const void* b, const int b_ld, + const double beta, + void* c, const int c_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto beta_cpp = beta; + const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; + const auto c_size = n * c_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + auto c_buffer = clblast::Buffer(context, c_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + c_buffer.Write(queue, c_size, reinterpret_cast(c)); + auto queue_cl = queue(); + auto s = clblast::Her2k(static_cast(layout), + static_cast(triangle), + static_cast(ab_transpose), + n, k, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + beta_cpp, + c_buffer(), 0, c_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + c_buffer.Read(queue, c_size, reinterpret_cast(c)); +} + +// TRMM +void cblas_strmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const float alpha, + const float* a, const int a_ld, + float* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trmm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_dtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const double alpha, + const double* a, const int a_ld, + double* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trmm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_ctrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trmm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_ztrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trmm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} + +// TRSM +void cblas_strsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const float alpha, + const float* a, const int a_ld, + float* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trsm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_dtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const double alpha, + const double* a, const int a_ld, + double* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trsm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_ctrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trsm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; + const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Trsm(static_cast(layout), + static_cast(side), + static_cast(triangle), + static_cast(a_transpose), + static_cast(diagonal), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} + +// ================================================================================================= +// Extra non-BLAS routines (level-X) +// ================================================================================================= + +// OMATCOPY +void cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const float alpha, + const float* a, const int a_ld, + float* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Omatcopy(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_domatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const double alpha, + const double* a, const int a_ld, + double* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = alpha; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Omatcopy(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_comatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Omatcopy(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} +void cblas_zomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, + const int m, const int n, + const void* alpha, + const void* a, const int a_ld, + void* b, const int b_ld) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; + const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; + const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; + auto a_buffer = clblast::Buffer(context, a_size); + auto b_buffer = clblast::Buffer(context, b_size); + a_buffer.Write(queue, a_size, reinterpret_cast(a)); + b_buffer.Write(queue, b_size, reinterpret_cast(b)); + auto queue_cl = queue(); + auto s = clblast::Omatcopy(static_cast(layout), + static_cast(a_transpose), + m, n, + alpha_cpp, + a_buffer(), 0, a_ld, + b_buffer(), 0, b_ld, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + b_buffer.Read(queue, b_size, reinterpret_cast(b)); +} + +// ================================================================================================= -- cgit v1.2.3