// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file contains the Netlib CBLAS implementations to the CLBlast BLAS routines, performing buffer // copies automatically and running on the default OpenCL platform and device. For full control over // performance, it is advised to use the regular clblast.h or clblast_c.h headers instead. // // ================================================================================================= #include #include "clblast_netlib_c.h" #include "clblast.h" #include "utilities/utilities.hpp" // Shortcuts to the clblast namespace using float2 = clblast::float2; using double2 = clblast::double2; // Option to make OpenCL device and context static to avoid re-creation upon multiple calls to the // Netlib API. Disadvantage is that they are not cleaned-up until program termination. #ifdef NETLIB_PERSISTENT_OPENCL #define OPTIONAL_STATIC static #else #define OPTIONAL_STATIC #endif // Helper function to get a default OpenCL platform and device clblast::Device get_device() { auto platform_id = clblast::ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0}); auto device_id = clblast::ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0}); auto platform = clblast::Platform(platform_id); return clblast::Device(platform, device_id); } // ================================================================================================= // BLAS level-1 (vector-vector) routines // ================================================================================================= // ROTG void cblas_srotg(float* sa, float* sb, float* sc, float* ss) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto sa_size = 1; const auto sb_size = 1; const auto sc_size = 1; const auto ss_size = 1; auto sa_buffer = clblast::Buffer(context, sa_size); auto sb_buffer = clblast::Buffer(context, sb_size); auto sc_buffer = clblast::Buffer(context, sc_size); auto ss_buffer = clblast::Buffer(context, ss_size); sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); auto queue_cl = queue(); auto s = clblast::Rotg(sa_buffer(), 0, sb_buffer(), 0, sc_buffer(), 0, ss_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); } void cblas_drotg(double* sa, double* sb, double* sc, double* ss) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto sa_size = 1; const auto sb_size = 1; const auto sc_size = 1; const auto ss_size = 1; auto sa_buffer = clblast::Buffer(context, sa_size); auto sb_buffer = clblast::Buffer(context, sb_size); auto sc_buffer = clblast::Buffer(context, sc_size); auto ss_buffer = clblast::Buffer(context, ss_size); sa_buffer.Write(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Write(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Write(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Write(queue, ss_size, reinterpret_cast(ss)); auto queue_cl = queue(); auto s = clblast::Rotg(sa_buffer(), 0, sb_buffer(), 0, sc_buffer(), 0, ss_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } sa_buffer.Read(queue, sa_size, reinterpret_cast(sa)); sb_buffer.Read(queue, sb_size, reinterpret_cast(sb)); sc_buffer.Read(queue, sc_size, reinterpret_cast(sc)); ss_buffer.Read(queue, ss_size, reinterpret_cast(ss)); } // ROTMG void cblas_srotmg(float* sd1, float* sd2, float* sx1, const float sy1, float* sparam) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto sy1_size = 1; const auto sd1_size = 1; const auto sd2_size = 1; const auto sx1_size = 1; const auto sparam_size = 1; auto sy1_buffer = clblast::Buffer(context, sy1_size); float sy1_vec[1]; sy1_vec[0] = sy1; auto sd1_buffer = clblast::Buffer(context, sd1_size); auto sd2_buffer = clblast::Buffer(context, sd2_size); auto sx1_buffer = clblast::Buffer(context, sx1_size); auto sparam_buffer = clblast::Buffer(context, sparam_size); sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1_vec)); sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = clblast::Rotmg(sd1_buffer(), 0, sd2_buffer(), 0, sx1_buffer(), 0, sy1_buffer(), 0, sparam_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } void cblas_drotmg(double* sd1, double* sd2, double* sx1, const double sy1, double* sparam) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto sy1_size = 1; const auto sd1_size = 1; const auto sd2_size = 1; const auto sx1_size = 1; const auto sparam_size = 1; auto sy1_buffer = clblast::Buffer(context, sy1_size); double sy1_vec[1]; sy1_vec[0] = sy1; auto sd1_buffer = clblast::Buffer(context, sd1_size); auto sd2_buffer = clblast::Buffer(context, sd2_size); auto sx1_buffer = clblast::Buffer(context, sx1_size); auto sparam_buffer = clblast::Buffer(context, sparam_size); sy1_buffer.Write(queue, sy1_size, reinterpret_cast(sy1_vec)); sd1_buffer.Write(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Write(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Write(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = clblast::Rotmg(sd1_buffer(), 0, sd2_buffer(), 0, sx1_buffer(), 0, sy1_buffer(), 0, sparam_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } sd1_buffer.Read(queue, sd1_size, reinterpret_cast(sd1)); sd2_buffer.Read(queue, sd2_size, reinterpret_cast(sd2)); sx1_buffer.Read(queue, sx1_size, reinterpret_cast(sx1)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } // ROT void cblas_srot(const int n, float* x, const int x_inc, float* y, const int y_inc, const float cos, const float sin) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Rot(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, cos, sin, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_drot(const int n, double* x, const int x_inc, double* y, const int y_inc, const double cos, const double sin) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Rot(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, cos, sin, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // ROTM void cblas_srotm(const int n, float* x, const int x_inc, float* y, const int y_inc, float* sparam) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto sparam_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto sparam_buffer = clblast::Buffer(context, sparam_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = clblast::Rotm(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, sparam_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } void cblas_drotm(const int n, double* x, const int x_inc, double* y, const int y_inc, double* sparam) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto sparam_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto sparam_buffer = clblast::Buffer(context, sparam_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); sparam_buffer.Write(queue, sparam_size, reinterpret_cast(sparam)); auto queue_cl = queue(); auto s = clblast::Rotm(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, sparam_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); sparam_buffer.Read(queue, sparam_size, reinterpret_cast(sparam)); } // SWAP void cblas_sswap(const int n, float* x, const int x_inc, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dswap(const int n, double* x, const int x_inc, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cswap(const int n, void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zswap(const int n, void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Swap(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SCAL void cblas_sscal(const int n, const float alpha, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; auto x_buffer = clblast::Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dscal(const int n, const double alpha, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; auto x_buffer = clblast::Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_cscal(const int n, const void* alpha, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; auto x_buffer = clblast::Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_zscal(const int n, const void* alpha, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; auto x_buffer = clblast::Buffer(context, x_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Scal(n, alpha_cpp, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // COPY void cblas_scopy(const int n, const float* x, const int x_inc, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dcopy(const int n, const double* x, const int x_inc, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_ccopy(const int n, const void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zcopy(const int n, const void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Copy(n, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // AXPY void cblas_saxpy(const int n, const float alpha, const float* x, const int x_inc, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_daxpy(const int n, const double alpha, const double* x, const int x_inc, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_caxpy(const int n, const void* alpha, const void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zaxpy(const int n, const void* alpha, const void* x, const int x_inc, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Axpy(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // DOT float cblas_sdot(const int n, const float* x, const int x_inc, const float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dot(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float dot[dot_size]; dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); return dot[0]; } double cblas_ddot(const int n, const double* x, const int x_inc, const double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dot(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double dot[dot_size]; dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); return dot[0]; } // DOTU void cblas_cdotu_sub(const int n, const void* x, const int x_inc, const void* y, const int y_inc, void* dot) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dotu(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } void cblas_zdotu_sub(const int n, const void* x, const int x_inc, const void* y, const int y_inc, void* dot) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dotu(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } // DOTC void cblas_cdotc_sub(const int n, const void* x, const int x_inc, const void* y, const int y_inc, void* dot) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dotc(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } void cblas_zdotc_sub(const int n, const void* x, const int x_inc, const void* y, const int y_inc, void* dot) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto dot_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto dot_buffer = clblast::Buffer(context, dot_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Dotc(n, dot_buffer(), 0, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } dot_buffer.Read(queue, dot_size, reinterpret_cast(dot)); } // NRM2 float cblas_snrm2(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto nrm2_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto nrm2_buffer = clblast::Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float nrm2[nrm2_size]; nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); return nrm2[0]; } double cblas_dnrm2(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto nrm2_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto nrm2_buffer = clblast::Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double nrm2[nrm2_size]; nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); return nrm2[0]; } float cblas_scnrm2(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto nrm2_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto nrm2_buffer = clblast::Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float2 nrm2[nrm2_size]; nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); return nrm2[0].real(); } double cblas_dznrm2(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto nrm2_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto nrm2_buffer = clblast::Buffer(context, nrm2_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Nrm2(n, nrm2_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double2 nrm2[nrm2_size]; nrm2_buffer.Read(queue, nrm2_size, reinterpret_cast(nrm2)); return nrm2[0].real(); } // ASUM float cblas_sasum(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto asum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto asum_buffer = clblast::Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float asum[asum_size]; asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); return asum[0]; } double cblas_dasum(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto asum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto asum_buffer = clblast::Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double asum[asum_size]; asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); return asum[0]; } float cblas_scasum(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto asum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto asum_buffer = clblast::Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float2 asum[asum_size]; asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); return asum[0].real(); } double cblas_dzasum(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto asum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto asum_buffer = clblast::Buffer(context, asum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Asum(n, asum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double2 asum[asum_size]; asum_buffer.Read(queue, asum_size, reinterpret_cast(asum)); return asum[0].real(); } // SUM float cblas_ssum(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto sum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto sum_buffer = clblast::Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float sum[sum_size]; sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); return sum[0]; } double cblas_dsum(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto sum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto sum_buffer = clblast::Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double sum[sum_size]; sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); return sum[0]; } float cblas_scsum(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto sum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto sum_buffer = clblast::Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } float2 sum[sum_size]; sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); return sum[0].real(); } double cblas_dzsum(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto sum_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto sum_buffer = clblast::Buffer(context, sum_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Sum(n, sum_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } double2 sum[sum_size]; sum_buffer.Read(queue, sum_size, reinterpret_cast(sum)); return sum[0].real(); } // AMAX int cblas_isamax(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_idamax(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_icamax(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_izamax(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amax(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } // AMIN int cblas_isamin(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amin(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_idamin(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amin(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_icamin(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amin(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_izamin(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Amin(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } // MAX int cblas_ismax(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_idmax(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_icmax(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } int cblas_izmax(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imax_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imax_buffer = clblast::Buffer(context, imax_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Max(n, imax_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imax[imax_size]; imax_buffer.Read(queue, imax_size, reinterpret_cast(imax)); return imax[0]; } // MIN int cblas_ismin(const int n, const float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_idmin(const int n, const double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_icmin(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } int cblas_izmin(const int n, const void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto x_size = n * x_inc; const auto imin_size = 1; auto x_buffer = clblast::Buffer(context, x_size); auto imin_buffer = clblast::Buffer(context, imin_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Min(n, imin_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } int imin[imin_size]; imin_buffer.Read(queue, imin_size, reinterpret_cast(imin)); return imin[0]; } // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= // GEMV void cblas_sgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gemv(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // GBMV void cblas_sgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const int kl, const int ku, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const int kl, const int ku, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_cgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto x_size = (a_transpose != CLBlastTransposeNo) ? m * x_inc : n * x_inc; const auto y_size = (a_transpose != CLBlastTransposeNo) ? n * y_inc : m * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Gbmv(static_cast(layout), static_cast(a_transpose), m, n, kl, ku, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HEMV void cblas_chemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hemv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hemv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HBMV void cblas_chbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // HPMV void cblas_chpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* ap, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hpmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_zhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* ap, const void* x, const int x_inc, const void* beta, void* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Hpmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SYMV void cblas_ssymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Symv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Symv(static_cast(layout), static_cast(triangle), n, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SBMV void cblas_ssbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Sbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = n * a_ld; const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Sbmv(static_cast(layout), static_cast(triangle), n, k, alpha_cpp, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // SPMV void cblas_sspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* ap, const float* x, const int x_inc, const float beta, float* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Spmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } void cblas_dspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* ap, const double* x, const int x_inc, const double beta, double* y, const int y_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; const auto y_size = n * y_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); auto queue_cl = queue(); auto s = clblast::Spmv(static_cast(layout), static_cast(triangle), n, alpha_cpp, ap_buffer(), 0, x_buffer(), 0, x_inc, beta_cpp, y_buffer(), 0, y_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } y_buffer.Read(queue, y_size, reinterpret_cast(y)); } // TRMV void cblas_strmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const float* a, const int a_ld, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const double* a, const int a_ld, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TBMV void cblas_stbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const float* a, const int a_ld, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const double* a, const int a_ld, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TPMV void cblas_stpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const float* ap, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const double* ap, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpmv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TRSV void cblas_strsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const float* a, const int a_ld, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const double* a, const int a_ld, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Trsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TBSV void cblas_stbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const float* a, const int a_ld, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const double* a, const int a_ld, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztbsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const int k, const void* a, const int a_ld, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto a_size = n * a_ld; const auto x_size = n * x_inc; auto a_buffer = clblast::Buffer(context, a_size); auto x_buffer = clblast::Buffer(context, x_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tbsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, k, a_buffer(), 0, a_ld, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // TPSV void cblas_stpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const float* ap, float* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_dtpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const double* ap, double* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ctpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } void cblas_ztpsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int n, const void* ap, void* x, const int x_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto ap_size = ((n*(n+1)) / 2); const auto x_size = n * x_inc; auto ap_buffer = clblast::Buffer(context, ap_size); auto x_buffer = clblast::Buffer(context, x_size); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); x_buffer.Write(queue, x_size, reinterpret_cast(x)); auto queue_cl = queue(); auto s = clblast::Tpsv(static_cast(layout), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), n, ap_buffer(), 0, x_buffer(), 0, x_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } x_buffer.Read(queue, x_size, reinterpret_cast(x)); } // GER void cblas_sger(const CLBlastLayout layout, const int m, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Ger(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dger(const CLBlastLayout layout, const int m, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Ger(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // GERU void cblas_cgeru(const CLBlastLayout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Geru(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zgeru(const CLBlastLayout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Geru(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // GERC void cblas_cgerc(const CLBlastLayout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Gerc(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zgerc(const CLBlastLayout layout, const int m, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = m * x_inc; const auto y_size = n * y_inc; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Gerc(static_cast(layout), m, n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HER void cblas_cher(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const void* x, const int x_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Her(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zher(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const void* x, const int x_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Her(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HPR void cblas_chpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const void* x, const int x_inc, void* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Hpr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_zhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const void* x, const int x_inc, void* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Hpr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // HER2 void cblas_cher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Her2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_zher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Her2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // HPR2 void cblas_chpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Hpr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_zhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, void* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Hpr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // SYR void cblas_ssyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* x, const int x_inc, float* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Syr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* x, const int x_inc, double* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Syr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // SPR void cblas_sspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* x, const int x_inc, float* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Spr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_dspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* x, const int x_inc, double* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Spr(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // SYR2 void cblas_ssyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Syr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } void cblas_dsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* a, const int a_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto a_size = n * a_ld; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto a_buffer = clblast::Buffer(context, a_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); a_buffer.Write(queue, a_size, reinterpret_cast(a)); auto queue_cl = queue(); auto s = clblast::Syr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, a_buffer(), 0, a_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } a_buffer.Read(queue, a_size, reinterpret_cast(a)); } // SPR2 void cblas_sspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, float* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Spr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } void cblas_dspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, double* ap) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto ap_size = ((n*(n+1)) / 2); auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto ap_buffer = clblast::Buffer(context, ap_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); ap_buffer.Write(queue, ap_size, reinterpret_cast(ap)); auto queue_cl = queue(); auto s = clblast::Spr2(static_cast(layout), static_cast(triangle), n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, ap_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } ap_buffer.Read(queue, ap_size, reinterpret_cast(ap)); } // ================================================================================================= // BLAS level-3 (matrix-matrix) routines // ================================================================================================= // GEMM void cblas_sgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const int m, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const int m, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_cgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const int m, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const int m, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? m * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && b_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && b_transpose == CLBlastTransposeNo)) ? k * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Gemm(static_cast(layout), static_cast(a_transpose), static_cast(b_transpose), m, n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYMM void cblas_ssymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Symm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HEMM void cblas_chemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Hemm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const int m, const int n, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : ((side == CLBlastSideLeft) ? m : n) * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? ((side == CLBlastSideLeft) ? m : n) * b_ld : n * b_ld; const auto c_size = (layout == CLBlastLayoutRowMajor) ? m * c_ld : n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Hemm(static_cast(layout), static_cast(side), static_cast(triangle), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYRK void cblas_ssyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const float alpha, const float* a, const int a_ld, const float beta, float* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const double alpha, const double* a, const int a_ld, const double beta, double* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syrk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HERK void cblas_cherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const float alpha, const void* a, const int a_ld, const float beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Herk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const int n, const int k, const double alpha, const void* a, const int a_ld, const double beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Herk(static_cast(layout), static_cast(triangle), static_cast(a_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // SYR2K void cblas_ssyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const float alpha, const float* a, const int a_ld, const float* b, const int b_ld, const float beta, float* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_dsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const double alpha, const double* a, const int a_ld, const double* b, const int b_ld, const double beta, double* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_csyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const void* beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Syr2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // HER2K void cblas_cher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const float beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Her2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } void cblas_zher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const int n, const int k, const void* alpha, const void* a, const int a_ld, const void* b, const int b_ld, const double beta, void* c, const int c_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = beta; const auto a_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * a_ld : k * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && ab_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && ab_transpose == CLBlastTransposeNo)) ? n * b_ld : k * b_ld; const auto c_size = n * c_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); auto c_buffer = clblast::Buffer(context, c_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); c_buffer.Write(queue, c_size, reinterpret_cast(c)); auto queue_cl = queue(); auto s = clblast::Her2k(static_cast(layout), static_cast(triangle), static_cast(ab_transpose), n, k, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, beta_cpp, c_buffer(), 0, c_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } c_buffer.Read(queue, c_size, reinterpret_cast(c)); } // TRMM void cblas_strmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_dtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ctrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ztrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trmm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // TRSM void cblas_strsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_dtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ctrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (side == CLBlastSideLeft) ? m * a_ld : n * a_ld; const auto b_size = (layout == CLBlastLayoutRowMajor) ? m * b_ld : n * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Trsm(static_cast(layout), static_cast(side), static_cast(triangle), static_cast(a_transpose), static_cast(diagonal), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // ================================================================================================= // Extra non-BLAS routines (level-X) // ================================================================================================= // HAD void cblas_shad(const int n, const float alpha, const float* x, const int x_inc, const float* y, const int y_inc, const float beta, float* z, const int z_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto z_size = n * z_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto z_buffer = clblast::Buffer(context, z_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); z_buffer.Write(queue, z_size, reinterpret_cast(z)); auto queue_cl = queue(); auto s = clblast::Had(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, beta_cpp, z_buffer(), 0, z_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } z_buffer.Read(queue, z_size, reinterpret_cast(z)); } void cblas_dhad(const int n, const double alpha, const double* x, const int x_inc, const double* y, const int y_inc, const double beta, double* z, const int z_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto beta_cpp = beta; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto z_size = n * z_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto z_buffer = clblast::Buffer(context, z_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); z_buffer.Write(queue, z_size, reinterpret_cast(z)); auto queue_cl = queue(); auto s = clblast::Had(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, beta_cpp, z_buffer(), 0, z_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } z_buffer.Read(queue, z_size, reinterpret_cast(z)); } void cblas_chad(const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, const void* beta, void* z, const int z_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = float2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto z_size = n * z_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto z_buffer = clblast::Buffer(context, z_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); z_buffer.Write(queue, z_size, reinterpret_cast(z)); auto queue_cl = queue(); auto s = clblast::Had(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, beta_cpp, z_buffer(), 0, z_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } z_buffer.Read(queue, z_size, reinterpret_cast(z)); } void cblas_zhad(const int n, const void* alpha, const void* x, const int x_inc, const void* y, const int y_inc, const void* beta, void* z, const int z_inc) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto beta_cpp = double2{reinterpret_cast(beta)[0], reinterpret_cast(beta)[1]}; const auto x_size = n * x_inc; const auto y_size = n * y_inc; const auto z_size = n * z_inc; auto x_buffer = clblast::Buffer(context, x_size); auto y_buffer = clblast::Buffer(context, y_size); auto z_buffer = clblast::Buffer(context, z_size); x_buffer.Write(queue, x_size, reinterpret_cast(x)); y_buffer.Write(queue, y_size, reinterpret_cast(y)); z_buffer.Write(queue, z_size, reinterpret_cast(z)); auto queue_cl = queue(); auto s = clblast::Had(n, alpha_cpp, x_buffer(), 0, x_inc, y_buffer(), 0, y_inc, beta_cpp, z_buffer(), 0, z_inc, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } z_buffer.Read(queue, z_size, reinterpret_cast(z)); } // OMATCOPY void cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const float alpha, const float* a, const int a_ld, float* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_domatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const double alpha, const double* a, const int a_ld, double* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = alpha; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_comatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = float2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } void cblas_zomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, const void* alpha, const void* a, const int a_ld, void* b, const int b_ld) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto alpha_cpp = double2{reinterpret_cast(alpha)[0], reinterpret_cast(alpha)[1]}; const auto a_size = (layout == CLBlastLayoutRowMajor) ? m * a_ld : n * a_ld; const auto b_size = ((layout == CLBlastLayoutColMajor && a_transpose != CLBlastTransposeNo) || (layout == CLBlastLayoutRowMajor && a_transpose == CLBlastTransposeNo)) ? n * b_ld : m * b_ld; auto a_buffer = clblast::Buffer(context, a_size); auto b_buffer = clblast::Buffer(context, b_size); a_buffer.Write(queue, a_size, reinterpret_cast(a)); b_buffer.Write(queue, b_size, reinterpret_cast(b)); auto queue_cl = queue(); auto s = clblast::Omatcopy(static_cast(layout), static_cast(a_transpose), m, n, alpha_cpp, a_buffer(), 0, a_ld, b_buffer(), 0, b_ld, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } b_buffer.Read(queue, b_size, reinterpret_cast(b)); } // IM2COL void cblas_sim2col(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const float* im, float* col) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto im_size = height * width * channels; const auto col_size = height * width * channels; auto im_buffer = clblast::Buffer(context, im_size); auto col_buffer = clblast::Buffer(context, col_size); im_buffer.Write(queue, im_size, reinterpret_cast(im)); col_buffer.Write(queue, col_size, reinterpret_cast(col)); auto queue_cl = queue(); auto s = clblast::Im2col(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, im_buffer(), 0, col_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } col_buffer.Read(queue, col_size, reinterpret_cast(col)); } void cblas_dim2col(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const double* im, double* col) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto im_size = height * width * channels; const auto col_size = height * width * channels; auto im_buffer = clblast::Buffer(context, im_size); auto col_buffer = clblast::Buffer(context, col_size); im_buffer.Write(queue, im_size, reinterpret_cast(im)); col_buffer.Write(queue, col_size, reinterpret_cast(col)); auto queue_cl = queue(); auto s = clblast::Im2col(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, im_buffer(), 0, col_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } col_buffer.Read(queue, col_size, reinterpret_cast(col)); } void cblas_cim2col(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const void* im, void* col) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto im_size = height * width * channels; const auto col_size = height * width * channels; auto im_buffer = clblast::Buffer(context, im_size); auto col_buffer = clblast::Buffer(context, col_size); im_buffer.Write(queue, im_size, reinterpret_cast(im)); col_buffer.Write(queue, col_size, reinterpret_cast(col)); auto queue_cl = queue(); auto s = clblast::Im2col(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, im_buffer(), 0, col_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } col_buffer.Read(queue, col_size, reinterpret_cast(col)); } void cblas_zim2col(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const void* im, void* col) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto im_size = height * width * channels; const auto col_size = height * width * channels; auto im_buffer = clblast::Buffer(context, im_size); auto col_buffer = clblast::Buffer(context, col_size); im_buffer.Write(queue, im_size, reinterpret_cast(im)); col_buffer.Write(queue, col_size, reinterpret_cast(col)); auto queue_cl = queue(); auto s = clblast::Im2col(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, im_buffer(), 0, col_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } col_buffer.Read(queue, col_size, reinterpret_cast(col)); } // COL2IM void cblas_scol2im(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const float* col, float* im) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto col_size = height * width * channels; const auto im_size = height * width * channels; auto col_buffer = clblast::Buffer(context, col_size); auto im_buffer = clblast::Buffer(context, im_size); col_buffer.Write(queue, col_size, reinterpret_cast(col)); im_buffer.Write(queue, im_size, reinterpret_cast(im)); auto queue_cl = queue(); auto s = clblast::Col2im(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, col_buffer(), 0, im_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } im_buffer.Read(queue, im_size, reinterpret_cast(im)); } void cblas_dcol2im(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const double* col, double* im) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto col_size = height * width * channels; const auto im_size = height * width * channels; auto col_buffer = clblast::Buffer(context, col_size); auto im_buffer = clblast::Buffer(context, im_size); col_buffer.Write(queue, col_size, reinterpret_cast(col)); im_buffer.Write(queue, im_size, reinterpret_cast(im)); auto queue_cl = queue(); auto s = clblast::Col2im(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, col_buffer(), 0, im_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } im_buffer.Read(queue, im_size, reinterpret_cast(im)); } void cblas_ccol2im(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const void* col, void* im) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto col_size = height * width * channels; const auto im_size = height * width * channels; auto col_buffer = clblast::Buffer(context, col_size); auto im_buffer = clblast::Buffer(context, im_size); col_buffer.Write(queue, col_size, reinterpret_cast(col)); im_buffer.Write(queue, im_size, reinterpret_cast(im)); auto queue_cl = queue(); auto s = clblast::Col2im(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, col_buffer(), 0, im_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } im_buffer.Read(queue, im_size, reinterpret_cast(im)); } void cblas_zcol2im(const CLBlastKernelMode kernel_mode, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, const void* col, void* im) { OPTIONAL_STATIC auto device = get_device(); OPTIONAL_STATIC auto context = clblast::Context(device); auto queue = clblast::Queue(context, device); const auto col_size = height * width * channels; const auto im_size = height * width * channels; auto col_buffer = clblast::Buffer(context, col_size); auto im_buffer = clblast::Buffer(context, im_size); col_buffer.Write(queue, col_size, reinterpret_cast(col)); im_buffer.Write(queue, im_size, reinterpret_cast(im)); auto queue_cl = queue(); auto s = clblast::Col2im(static_cast(kernel_mode), channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, col_buffer(), 0, im_buffer(), 0, &queue_cl); if (s != clblast::StatusCode::kSuccess) { throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); } im_buffer.Read(queue, im_size, reinterpret_cast(im)); } // =================================================================================================