From 61203453aaca4e47c05c598a673150522160ca87 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 19 Jun 2016 13:55:49 +0200 Subject: Renamed all C++ source files to .cpp to match the .hpp extension better --- src/routines/level2/xgbmv.cc | 68 ---------------- src/routines/level2/xgbmv.cpp | 68 ++++++++++++++++ src/routines/level2/xgemv.cc | 181 ------------------------------------------ src/routines/level2/xgemv.cpp | 181 ++++++++++++++++++++++++++++++++++++++++++ src/routines/level2/xger.cc | 106 ------------------------- src/routines/level2/xger.cpp | 106 +++++++++++++++++++++++++ src/routines/level2/xgerc.cc | 53 ------------- src/routines/level2/xgerc.cpp | 53 +++++++++++++ src/routines/level2/xgeru.cc | 52 ------------ src/routines/level2/xgeru.cpp | 52 ++++++++++++ src/routines/level2/xhbmv.cc | 64 --------------- src/routines/level2/xhbmv.cpp | 64 +++++++++++++++ src/routines/level2/xhemv.cc | 64 --------------- src/routines/level2/xhemv.cpp | 64 +++++++++++++++ src/routines/level2/xher.cc | 117 --------------------------- src/routines/level2/xher.cpp | 117 +++++++++++++++++++++++++++ src/routines/level2/xher2.cc | 108 ------------------------- src/routines/level2/xher2.cpp | 108 +++++++++++++++++++++++++ src/routines/level2/xhpmv.cc | 64 --------------- src/routines/level2/xhpmv.cpp | 64 +++++++++++++++ src/routines/level2/xhpr.cc | 51 ------------ src/routines/level2/xhpr.cpp | 51 ++++++++++++ src/routines/level2/xhpr2.cc | 53 ------------- src/routines/level2/xhpr2.cpp | 53 +++++++++++++ src/routines/level2/xsbmv.cc | 65 --------------- src/routines/level2/xsbmv.cpp | 65 +++++++++++++++ src/routines/level2/xspmv.cc | 65 --------------- src/routines/level2/xspmv.cpp | 65 +++++++++++++++ src/routines/level2/xspr.cc | 52 ------------ src/routines/level2/xspr.cpp | 52 ++++++++++++ src/routines/level2/xspr2.cc | 54 ------------- src/routines/level2/xspr2.cpp | 54 +++++++++++++ src/routines/level2/xsymv.cc | 65 --------------- src/routines/level2/xsymv.cpp | 65 +++++++++++++++ src/routines/level2/xsyr.cc | 51 ------------ src/routines/level2/xsyr.cpp | 51 ++++++++++++ src/routines/level2/xsyr2.cc | 53 ------------- src/routines/level2/xsyr2.cpp | 53 +++++++++++++ src/routines/level2/xtbmv.cc | 82 ------------------- src/routines/level2/xtbmv.cpp | 82 +++++++++++++++++++ src/routines/level2/xtpmv.cc | 82 ------------------- src/routines/level2/xtpmv.cpp | 82 +++++++++++++++++++ src/routines/level2/xtrmv.cc | 82 ------------------- src/routines/level2/xtrmv.cpp | 82 +++++++++++++++++++ 44 files changed, 1632 insertions(+), 1632 deletions(-) delete mode 100644 src/routines/level2/xgbmv.cc create mode 100644 src/routines/level2/xgbmv.cpp delete mode 100644 src/routines/level2/xgemv.cc create mode 100644 src/routines/level2/xgemv.cpp delete mode 100644 src/routines/level2/xger.cc create mode 100644 src/routines/level2/xger.cpp delete mode 100644 src/routines/level2/xgerc.cc create mode 100644 src/routines/level2/xgerc.cpp delete mode 100644 src/routines/level2/xgeru.cc create mode 100644 src/routines/level2/xgeru.cpp delete mode 100644 src/routines/level2/xhbmv.cc create mode 100644 src/routines/level2/xhbmv.cpp delete mode 100644 src/routines/level2/xhemv.cc create mode 100644 src/routines/level2/xhemv.cpp delete mode 100644 src/routines/level2/xher.cc create mode 100644 src/routines/level2/xher.cpp delete mode 100644 src/routines/level2/xher2.cc create mode 100644 src/routines/level2/xher2.cpp delete mode 100644 src/routines/level2/xhpmv.cc create mode 100644 src/routines/level2/xhpmv.cpp delete mode 100644 src/routines/level2/xhpr.cc create mode 100644 src/routines/level2/xhpr.cpp delete mode 100644 src/routines/level2/xhpr2.cc create mode 100644 src/routines/level2/xhpr2.cpp delete mode 100644 src/routines/level2/xsbmv.cc create mode 100644 src/routines/level2/xsbmv.cpp delete mode 100644 src/routines/level2/xspmv.cc create mode 100644 src/routines/level2/xspmv.cpp delete mode 100644 src/routines/level2/xspr.cc create mode 100644 src/routines/level2/xspr.cpp delete mode 100644 src/routines/level2/xspr2.cc create mode 100644 src/routines/level2/xspr2.cpp delete mode 100644 src/routines/level2/xsymv.cc create mode 100644 src/routines/level2/xsymv.cpp delete mode 100644 src/routines/level2/xsyr.cc create mode 100644 src/routines/level2/xsyr.cpp delete mode 100644 src/routines/level2/xsyr2.cc create mode 100644 src/routines/level2/xsyr2.cpp delete mode 100644 src/routines/level2/xtbmv.cc create mode 100644 src/routines/level2/xtbmv.cpp delete mode 100644 src/routines/level2/xtpmv.cc create mode 100644 src/routines/level2/xtpmv.cpp delete mode 100644 src/routines/level2/xtrmv.cc create mode 100644 src/routines/level2/xtrmv.cpp (limited to 'src/routines/level2') diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cc deleted file mode 100644 index ea4f001c..00000000 --- a/src/routines/level2/xgbmv.cc +++ /dev/null @@ -1,68 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xgbmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xgbmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xgbmv::Xgbmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, - const size_t m, const size_t n, const size_t kl, const size_t ku, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // Reverses the upper and lower band count - auto rotated = (layout == Layout::kRowMajor); - auto kl_real = (rotated) ? ku : kl; - auto ku_real = (rotated) ? kl : ku; - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific hermitian matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_GBMV define. - bool fast_kernels = false; - return MatVec(layout, a_transpose, - m, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - 0, false, kl_real, ku_real); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xgbmv; -template class Xgbmv; -template class Xgbmv; -template class Xgbmv; -template class Xgbmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xgbmv.cpp b/src/routines/level2/xgbmv.cpp new file mode 100644 index 00000000..ea4f001c --- /dev/null +++ b/src/routines/level2/xgbmv.cpp @@ -0,0 +1,68 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgbmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xgbmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgbmv::Xgbmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // Reverses the upper and lower band count + auto rotated = (layout == Layout::kRowMajor); + auto kl_real = (rotated) ? ku : kl; + auto ku_real = (rotated) ? kl : ku; + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_GBMV define. + bool fast_kernels = false; + return MatVec(layout, a_transpose, + m, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + 0, false, kl_real, ku_real); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgbmv; +template class Xgbmv; +template class Xgbmv; +template class Xgbmv; +template class Xgbmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc deleted file mode 100644 index 21fb397c..00000000 --- a/src/routines/level2/xgemv.cc +++ /dev/null @@ -1,181 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xgemv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xgemv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xgemv::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue()) { - source_string_ = - #include "../../kernels/level2/xgemv.opencl" - #include "../../kernels/level2/xgemv_fast.opencl" - ; -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, - const size_t m, const size_t n, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // Performs the matrix-vector multiplication - return MatVec(layout, a_transpose, - m, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - true, true, - 0, false, 0, 0); // N/A for this routine -} - -// ================================================================================================= - -// The generic implementation, also suited for other (non general) matrix-vector multiplications -template -StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, - const size_t m, const size_t n, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - bool fast_kernel, bool fast_kernel_rot, - const size_t parameter, const bool packed, - const size_t kl, const size_t ku) { - - // Makes sure all dimensions are larger than zero - if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } - - // Computes whether or not the matrix has an alternative layout (row or column-major). - auto a_altlayout = (layout == Layout::kRowMajor); - auto a_one = (a_altlayout) ? n : m; - auto a_two = (a_altlayout) ? m : n; - - // Swap m and n if the matrix is transposed - auto a_transposed = (a_transpose != Transpose::kNo); - auto m_real = (a_transposed) ? n : m; - auto n_real = (a_transposed) ? m : n; - - // Special adjustments for banded matrices - if (kl != 0 || ku != 0) { - a_one = kl+ku+1; - } - - // Determines whether the kernel needs to perform rotated access ('^' is the XOR operator) - auto a_rotated = a_transposed ^ a_altlayout; - - // In case of complex data-types, the transpose can also become a conjugate transpose - auto a_conjugate = (a_transpose == Transpose::kConjugate); - - // Tests the matrix and the vectors for validity - auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } - else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } - if (ErrorIn(status)) { return status; } - status = TestVectorX(n_real, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(m_real, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } - - // Determines whether or not the fast-version can be used - fast_kernel = fast_kernel && (a_offset == 0) && (a_rotated == 0) && (a_conjugate == 0) && - IsMultiple(m, db_["WGS2"]*db_["WPT2"]) && - IsMultiple(n, db_["WGS2"]) && - IsMultiple(a_ld, db_["VW2"]); - fast_kernel_rot = fast_kernel_rot && (a_offset == 0) && (a_rotated == 1) && (a_conjugate == 0) && - IsMultiple(m, db_["WGS3"]*db_["WPT3"]) && - IsMultiple(n, db_["WGS3"]) && - IsMultiple(a_ld, db_["VW3"]); - - // If possible, run the fast-version (rotated or non-rotated) of the kernel - auto kernel_name = "Xgemv"; - auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]); - auto global_size = m_ceiled / db_["WPT1"]; - auto local_size = db_["WGS1"]; - if (fast_kernel) { - kernel_name = "XgemvFast"; - global_size = m_real / db_["WPT2"]; - local_size = db_["WGS2"]; - } - if (fast_kernel_rot) { - kernel_name = "XgemvFastRot"; - global_size = m_real / db_["WPT3"]; - local_size = db_["WGS3"]; - } - - // Upload the scalar arguments as constant buffers to the device (needed for half-precision) - auto alpha_buffer = Buffer(context_, 1); - auto beta_buffer = Buffer(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - beta_buffer.Write(queue_, 1, &beta); - - // Retrieves the Xgemv kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, kernel_name); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast(m_real)); - kernel.SetArgument(1, static_cast(n_real)); - kernel.SetArgument(2, alpha_buffer()); - kernel.SetArgument(3, beta_buffer()); - kernel.SetArgument(4, static_cast(a_rotated)); - kernel.SetArgument(5, a_buffer()); - kernel.SetArgument(6, static_cast(a_offset)); - kernel.SetArgument(7, static_cast(a_ld)); - kernel.SetArgument(8, x_buffer()); - kernel.SetArgument(9, static_cast(x_offset)); - kernel.SetArgument(10, static_cast(x_inc)); - kernel.SetArgument(11, y_buffer()); - kernel.SetArgument(12, static_cast(y_offset)); - kernel.SetArgument(13, static_cast(y_inc)); - kernel.SetArgument(14, static_cast(a_conjugate)); - kernel.SetArgument(15, static_cast(parameter)); // extra parameter used for symm/herm - kernel.SetArgument(16, static_cast(kl)); // only used for banded matrices - kernel.SetArgument(17, static_cast(ku)); // only used for banded matrices - - // Launches the kernel - auto global = std::vector{global_size}; - auto local = std::vector{local_size}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xgemv; -template class Xgemv; -template class Xgemv; -template class Xgemv; -template class Xgemv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp new file mode 100644 index 00000000..21fb397c --- /dev/null +++ b/src/routines/level2/xgemv.cpp @@ -0,0 +1,181 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgemv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xgemv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgemv::Xgemv(Queue &queue, EventPointer event, const std::string &name): + Routine(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue()) { + source_string_ = + #include "../../kernels/level2/xgemv.opencl" + #include "../../kernels/level2/xgemv_fast.opencl" + ; +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // Performs the matrix-vector multiplication + return MatVec(layout, a_transpose, + m, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + true, true, + 0, false, 0, 0); // N/A for this routine +} + +// ================================================================================================= + +// The generic implementation, also suited for other (non general) matrix-vector multiplications +template +StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, + const size_t kl, const size_t ku) { + + // Makes sure all dimensions are larger than zero + if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } + + // Computes whether or not the matrix has an alternative layout (row or column-major). + auto a_altlayout = (layout == Layout::kRowMajor); + auto a_one = (a_altlayout) ? n : m; + auto a_two = (a_altlayout) ? m : n; + + // Swap m and n if the matrix is transposed + auto a_transposed = (a_transpose != Transpose::kNo); + auto m_real = (a_transposed) ? n : m; + auto n_real = (a_transposed) ? m : n; + + // Special adjustments for banded matrices + if (kl != 0 || ku != 0) { + a_one = kl+ku+1; + } + + // Determines whether the kernel needs to perform rotated access ('^' is the XOR operator) + auto a_rotated = a_transposed ^ a_altlayout; + + // In case of complex data-types, the transpose can also become a conjugate transpose + auto a_conjugate = (a_transpose == Transpose::kConjugate); + + // Tests the matrix and the vectors for validity + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } + if (ErrorIn(status)) { return status; } + status = TestVectorX(n_real, x_buffer, x_offset, x_inc); + if (ErrorIn(status)) { return status; } + status = TestVectorY(m_real, y_buffer, y_offset, y_inc); + if (ErrorIn(status)) { return status; } + + // Determines whether or not the fast-version can be used + fast_kernel = fast_kernel && (a_offset == 0) && (a_rotated == 0) && (a_conjugate == 0) && + IsMultiple(m, db_["WGS2"]*db_["WPT2"]) && + IsMultiple(n, db_["WGS2"]) && + IsMultiple(a_ld, db_["VW2"]); + fast_kernel_rot = fast_kernel_rot && (a_offset == 0) && (a_rotated == 1) && (a_conjugate == 0) && + IsMultiple(m, db_["WGS3"]*db_["WPT3"]) && + IsMultiple(n, db_["WGS3"]) && + IsMultiple(a_ld, db_["VW3"]); + + // If possible, run the fast-version (rotated or non-rotated) of the kernel + auto kernel_name = "Xgemv"; + auto m_ceiled = Ceil(m_real, db_["WGS1"]*db_["WPT1"]); + auto global_size = m_ceiled / db_["WPT1"]; + auto local_size = db_["WGS1"]; + if (fast_kernel) { + kernel_name = "XgemvFast"; + global_size = m_real / db_["WPT2"]; + local_size = db_["WGS2"]; + } + if (fast_kernel_rot) { + kernel_name = "XgemvFastRot"; + global_size = m_real / db_["WPT3"]; + local_size = db_["WGS3"]; + } + + // Upload the scalar arguments as constant buffers to the device (needed for half-precision) + auto alpha_buffer = Buffer(context_, 1); + auto beta_buffer = Buffer(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + beta_buffer.Write(queue_, 1, &beta); + + // Retrieves the Xgemv kernel from the compiled binary + try { + const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); + auto kernel = Kernel(program, kernel_name); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(m_real)); + kernel.SetArgument(1, static_cast(n_real)); + kernel.SetArgument(2, alpha_buffer()); + kernel.SetArgument(3, beta_buffer()); + kernel.SetArgument(4, static_cast(a_rotated)); + kernel.SetArgument(5, a_buffer()); + kernel.SetArgument(6, static_cast(a_offset)); + kernel.SetArgument(7, static_cast(a_ld)); + kernel.SetArgument(8, x_buffer()); + kernel.SetArgument(9, static_cast(x_offset)); + kernel.SetArgument(10, static_cast(x_inc)); + kernel.SetArgument(11, y_buffer()); + kernel.SetArgument(12, static_cast(y_offset)); + kernel.SetArgument(13, static_cast(y_inc)); + kernel.SetArgument(14, static_cast(a_conjugate)); + kernel.SetArgument(15, static_cast(parameter)); // extra parameter used for symm/herm + kernel.SetArgument(16, static_cast(kl)); // only used for banded matrices + kernel.SetArgument(17, static_cast(ku)); // only used for banded matrices + + // Launches the kernel + auto global = std::vector{global_size}; + auto local = std::vector{local_size}; + status = RunKernel(kernel, queue_, device_, global, local, event_); + if (ErrorIn(status)) { return status; } + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgemv; +template class Xgemv; +template class Xgemv; +template class Xgemv; +template class Xgemv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc deleted file mode 100644 index 353047d2..00000000 --- a/src/routines/level2/xger.cc +++ /dev/null @@ -1,106 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xger class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xger.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xger::Xger(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = - #include "../../kernels/level2/level2.opencl" - #include "../../kernels/level2/xger.opencl" - ; -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xger::DoGer(const Layout layout, - const size_t m, const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - - // Makes sure all dimensions are larger than zero - if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } - - // Computes whether or not the matrix has an alternative layout (row or column-major). - const auto a_is_rowmajor = (layout == Layout::kRowMajor); - const auto a_one = (a_is_rowmajor) ? n : m; - const auto a_two = (a_is_rowmajor) ? m : n; - - // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); - if (ErrorIn(status)) { return status; } - status = TestVectorX(m, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } - - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - - // Retrieves the kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, "Xger"); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast(a_one)); - kernel.SetArgument(1, static_cast(a_two)); - kernel.SetArgument(2, alpha_buffer()); - kernel.SetArgument(3, x_buffer()); - kernel.SetArgument(4, static_cast(x_offset)); - kernel.SetArgument(5, static_cast(x_inc)); - kernel.SetArgument(6, y_buffer()); - kernel.SetArgument(7, static_cast(y_offset)); - kernel.SetArgument(8, static_cast(y_inc)); - kernel.SetArgument(9, a_buffer()); - kernel.SetArgument(10, static_cast(a_offset)); - kernel.SetArgument(11, static_cast(a_ld)); - kernel.SetArgument(12, static_cast(a_is_rowmajor)); - - // Launches the kernel - auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]); - auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); - auto global = std::vector{a_one_ceiled, a_two_ceiled}; - auto local = std::vector{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xger; -template class Xger; -template class Xger; -template class Xger; -template class Xger; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp new file mode 100644 index 00000000..353047d2 --- /dev/null +++ b/src/routines/level2/xger.cpp @@ -0,0 +1,106 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xger class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xger.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xger::Xger(Queue &queue, EventPointer event, const std::string &name): + Routine(queue, event, name, {"Xger"}, PrecisionValue()) { + source_string_ = + #include "../../kernels/level2/level2.opencl" + #include "../../kernels/level2/xger.opencl" + ; +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xger::DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Makes sure all dimensions are larger than zero + if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } + + // Computes whether or not the matrix has an alternative layout (row or column-major). + const auto a_is_rowmajor = (layout == Layout::kRowMajor); + const auto a_one = (a_is_rowmajor) ? n : m; + const auto a_two = (a_is_rowmajor) ? m : n; + + // Tests the matrix and the vectors for validity + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); + if (ErrorIn(status)) { return status; } + status = TestVectorX(m, x_buffer, x_offset, x_inc); + if (ErrorIn(status)) { return status; } + status = TestVectorY(n, y_buffer, y_offset, y_inc); + if (ErrorIn(status)) { return status; } + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary + try { + const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); + auto kernel = Kernel(program, "Xger"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(a_one)); + kernel.SetArgument(1, static_cast(a_two)); + kernel.SetArgument(2, alpha_buffer()); + kernel.SetArgument(3, x_buffer()); + kernel.SetArgument(4, static_cast(x_offset)); + kernel.SetArgument(5, static_cast(x_inc)); + kernel.SetArgument(6, y_buffer()); + kernel.SetArgument(7, static_cast(y_offset)); + kernel.SetArgument(8, static_cast(y_inc)); + kernel.SetArgument(9, a_buffer()); + kernel.SetArgument(10, static_cast(a_offset)); + kernel.SetArgument(11, static_cast(a_ld)); + kernel.SetArgument(12, static_cast(a_is_rowmajor)); + + // Launches the kernel + auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]); + auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); + auto global = std::vector{a_one_ceiled, a_two_ceiled}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, queue_, device_, global, local, event_); + if (ErrorIn(status)) { return status; } + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xger; +template class Xger; +template class Xger; +template class Xger; +template class Xger; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cc deleted file mode 100644 index d9feda97..00000000 --- a/src/routines/level2/xgerc.cc +++ /dev/null @@ -1,53 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xgerc class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xgerc.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xgerc::Xgerc(Queue &queue, EventPointer event, const std::string &name): - Xger(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xgerc::DoGerc(const Layout layout, - const size_t m, const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - - // Regular Ger operation on complex data, plus conjugation in the kernel guarded by the - // ROUTINE_GERC guard. - return DoGer(layout, m, n, alpha, - x_buffer, x_offset, x_inc, - y_buffer, y_offset, y_inc, - a_buffer, a_offset, a_ld); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xgerc; -template class Xgerc; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xgerc.cpp b/src/routines/level2/xgerc.cpp new file mode 100644 index 00000000..d9feda97 --- /dev/null +++ b/src/routines/level2/xgerc.cpp @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgerc class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xgerc.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgerc::Xgerc(Queue &queue, EventPointer event, const std::string &name): + Xger(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgerc::DoGerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Regular Ger operation on complex data, plus conjugation in the kernel guarded by the + // ROUTINE_GERC guard. + return DoGer(layout, m, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgerc; +template class Xgerc; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cc deleted file mode 100644 index da9e91c2..00000000 --- a/src/routines/level2/xgeru.cc +++ /dev/null @@ -1,52 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xgeru class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xgeru.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xgeru::Xgeru(Queue &queue, EventPointer event, const std::string &name): - Xger(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xgeru::DoGeru(const Layout layout, - const size_t m, const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - - // Regular Ger operation on complex data - return DoGer(layout, m, n, alpha, - x_buffer, x_offset, x_inc, - y_buffer, y_offset, y_inc, - a_buffer, a_offset, a_ld); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xgeru; -template class Xgeru; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xgeru.cpp b/src/routines/level2/xgeru.cpp new file mode 100644 index 00000000..da9e91c2 --- /dev/null +++ b/src/routines/level2/xgeru.cpp @@ -0,0 +1,52 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xgeru class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xgeru.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xgeru::Xgeru(Queue &queue, EventPointer event, const std::string &name): + Xger(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xgeru::DoGeru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Regular Ger operation on complex data + return DoGer(layout, m, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xgeru; +template class Xgeru; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cc deleted file mode 100644 index f6c0e3c4..00000000 --- a/src/routines/level2/xhbmv.cc +++ /dev/null @@ -1,64 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xhbmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xhbmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xhbmv::Xhbmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, - const size_t n, const size_t k, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific hermitian banded matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_HBMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, false, k, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xhbmv; -template class Xhbmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xhbmv.cpp b/src/routines/level2/xhbmv.cpp new file mode 100644 index 00000000..f6c0e3c4 --- /dev/null +++ b/src/routines/level2/xhbmv.cpp @@ -0,0 +1,64 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhbmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xhbmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhbmv::Xhbmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian banded matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_HBMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, false, k, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhbmv; +template class Xhbmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cc deleted file mode 100644 index 2cbcf7b4..00000000 --- a/src/routines/level2/xhemv.cc +++ /dev/null @@ -1,64 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xhemv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xhemv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xhemv::Xhemv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xhemv::DoHemv(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific hermitian matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_HEMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, false, 0, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xhemv; -template class Xhemv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xhemv.cpp b/src/routines/level2/xhemv.cpp new file mode 100644 index 00000000..2cbcf7b4 --- /dev/null +++ b/src/routines/level2/xhemv.cpp @@ -0,0 +1,64 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhemv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xhemv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhemv::Xhemv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhemv::DoHemv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_HEMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, false, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhemv; +template class Xhemv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc deleted file mode 100644 index ed8ba9e9..00000000 --- a/src/routines/level2/xher.cc +++ /dev/null @@ -1,117 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xher class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xher.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xher::Xher(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = - #include "../../kernels/level2/level2.opencl" - #include "../../kernels/level2/xher.opencl" - ; -} - -// ================================================================================================= - -// Specializations to compute alpha of type 'T' -template <> float2 Xher::GetAlpha(const float alpha) { return float2{alpha, 0.0f}; } -template <> double2 Xher::GetAlpha(const double alpha) { return double2{alpha, 0.0}; } -template <> float Xher::GetAlpha(const float alpha) { return alpha; } -template <> double Xher::GetAlpha(const double alpha) { return alpha; } -template <> half Xher::GetAlpha(const half alpha) { return alpha; } - -// ================================================================================================= - -// The main routine -template -StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, - const size_t n, - const U alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const bool packed) { - - // Makes sure the dimensions are larger than zero - if (n == 0) { return StatusCode::kInvalidDimension; } - - // The data is either in the upper or lower triangle - const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - const auto is_rowmajor = (layout == Layout::kRowMajor); - - // Tests the matrix and the vectors for validity - auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } - if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - - // If alpha is zero an update is not required - if (alpha == U{0}) { return StatusCode::kSuccess; } - - // Creates a matching version of alpha - const auto matching_alpha = GetAlpha(alpha); - - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer(context_, 1); - alpha_buffer.Write(queue_, 1, &matching_alpha); - - // Retrieves the kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, "Xher"); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast(n)); - kernel.SetArgument(1, alpha_buffer()); - kernel.SetArgument(2, x_buffer()); - kernel.SetArgument(3, static_cast(x_offset)); - kernel.SetArgument(4, static_cast(x_inc)); - kernel.SetArgument(5, a_buffer()); - kernel.SetArgument(6, static_cast(a_offset)); - kernel.SetArgument(7, static_cast(a_ld)); - kernel.SetArgument(8, static_cast(is_upper)); - kernel.SetArgument(9, static_cast(is_rowmajor)); - - // Launches the kernel - auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); - auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); - auto global = std::vector{global_one, global_two}; - auto local = std::vector{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xher; -template class Xher; -template class Xher; -template class Xher; -template class Xher; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp new file mode 100644 index 00000000..ed8ba9e9 --- /dev/null +++ b/src/routines/level2/xher.cpp @@ -0,0 +1,117 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xher.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xher::Xher(Queue &queue, EventPointer event, const std::string &name): + Routine(queue, event, name, {"Xger"}, PrecisionValue()) { + source_string_ = + #include "../../kernels/level2/level2.opencl" + #include "../../kernels/level2/xher.opencl" + ; +} + +// ================================================================================================= + +// Specializations to compute alpha of type 'T' +template <> float2 Xher::GetAlpha(const float alpha) { return float2{alpha, 0.0f}; } +template <> double2 Xher::GetAlpha(const double alpha) { return double2{alpha, 0.0}; } +template <> float Xher::GetAlpha(const float alpha) { return alpha; } +template <> double Xher::GetAlpha(const double alpha) { return alpha; } +template <> half Xher::GetAlpha(const half alpha) { return alpha; } + +// ================================================================================================= + +// The main routine +template +StatusCode Xher::DoHer(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed) { + + // Makes sure the dimensions are larger than zero + if (n == 0) { return StatusCode::kInvalidDimension; } + + // The data is either in the upper or lower triangle + const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + const auto is_rowmajor = (layout == Layout::kRowMajor); + + // Tests the matrix and the vectors for validity + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } + if (ErrorIn(status)) { return status; } + status = TestVectorX(n, x_buffer, x_offset, x_inc); + if (ErrorIn(status)) { return status; } + + // If alpha is zero an update is not required + if (alpha == U{0}) { return StatusCode::kSuccess; } + + // Creates a matching version of alpha + const auto matching_alpha = GetAlpha(alpha); + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer(context_, 1); + alpha_buffer.Write(queue_, 1, &matching_alpha); + + // Retrieves the kernel from the compiled binary + try { + const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); + auto kernel = Kernel(program, "Xher"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(n)); + kernel.SetArgument(1, alpha_buffer()); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, static_cast(x_offset)); + kernel.SetArgument(4, static_cast(x_inc)); + kernel.SetArgument(5, a_buffer()); + kernel.SetArgument(6, static_cast(a_offset)); + kernel.SetArgument(7, static_cast(a_ld)); + kernel.SetArgument(8, static_cast(is_upper)); + kernel.SetArgument(9, static_cast(is_rowmajor)); + + // Launches the kernel + auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); + auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); + auto global = std::vector{global_one, global_two}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, queue_, device_, global, local, event_); + if (ErrorIn(status)) { return status; } + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xher; +template class Xher; +template class Xher; +template class Xher; +template class Xher; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc deleted file mode 100644 index 50572cea..00000000 --- a/src/routines/level2/xher2.cc +++ /dev/null @@ -1,108 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xher2 class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xher2.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xher2::Xher2(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xger"}, PrecisionValue()) { - source_string_ = - #include "../../kernels/level2/level2.opencl" - #include "../../kernels/level2/xher2.opencl" - ; -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xher2::DoHer2(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const bool packed) { - - // Makes sure the dimensions are larger than zero - if (n == 0) { return StatusCode::kInvalidDimension; } - - // The data is either in the upper or lower triangle - const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - const auto is_rowmajor = (layout == Layout::kRowMajor); - - // Tests the matrix and the vectors for validity - auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } - if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } - - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - - // Retrieves the kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, "Xher2"); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast(n)); - kernel.SetArgument(1, alpha_buffer()); - kernel.SetArgument(2, x_buffer()); - kernel.SetArgument(3, static_cast(x_offset)); - kernel.SetArgument(4, static_cast(x_inc)); - kernel.SetArgument(5, y_buffer()); - kernel.SetArgument(6, static_cast(y_offset)); - kernel.SetArgument(7, static_cast(y_inc)); - kernel.SetArgument(8, a_buffer()); - kernel.SetArgument(9, static_cast(a_offset)); - kernel.SetArgument(10, static_cast(a_ld)); - kernel.SetArgument(11, static_cast(is_upper)); - kernel.SetArgument(12, static_cast(is_rowmajor)); - - // Launches the kernel - auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); - auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); - auto global = std::vector{global_one, global_two}; - auto local = std::vector{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xher2; -template class Xher2; -template class Xher2; -template class Xher2; -template class Xher2; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp new file mode 100644 index 00000000..50572cea --- /dev/null +++ b/src/routines/level2/xher2.cpp @@ -0,0 +1,108 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xher2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xher2.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xher2::Xher2(Queue &queue, EventPointer event, const std::string &name): + Routine(queue, event, name, {"Xger"}, PrecisionValue()) { + source_string_ = + #include "../../kernels/level2/level2.opencl" + #include "../../kernels/level2/xher2.opencl" + ; +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xher2::DoHer2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed) { + + // Makes sure the dimensions are larger than zero + if (n == 0) { return StatusCode::kInvalidDimension; } + + // The data is either in the upper or lower triangle + const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + const auto is_rowmajor = (layout == Layout::kRowMajor); + + // Tests the matrix and the vectors for validity + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } + if (ErrorIn(status)) { return status; } + status = TestVectorX(n, x_buffer, x_offset, x_inc); + if (ErrorIn(status)) { return status; } + status = TestVectorY(n, y_buffer, y_offset, y_inc); + if (ErrorIn(status)) { return status; } + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary + try { + const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); + auto kernel = Kernel(program, "Xher2"); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast(n)); + kernel.SetArgument(1, alpha_buffer()); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, static_cast(x_offset)); + kernel.SetArgument(4, static_cast(x_inc)); + kernel.SetArgument(5, y_buffer()); + kernel.SetArgument(6, static_cast(y_offset)); + kernel.SetArgument(7, static_cast(y_inc)); + kernel.SetArgument(8, a_buffer()); + kernel.SetArgument(9, static_cast(a_offset)); + kernel.SetArgument(10, static_cast(a_ld)); + kernel.SetArgument(11, static_cast(is_upper)); + kernel.SetArgument(12, static_cast(is_rowmajor)); + + // Launches the kernel + auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]); + auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); + auto global = std::vector{global_one, global_two}; + auto local = std::vector{db_["WGS1"], db_["WGS2"]}; + status = RunKernel(kernel, queue_, device_, global, local, event_); + if (ErrorIn(status)) { return status; } + + // Succesfully finished the computation + return StatusCode::kSuccess; + } catch (...) { return StatusCode::kInvalidKernel; } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xher2; +template class Xher2; +template class Xher2; +template class Xher2; +template class Xher2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cc deleted file mode 100644 index e6f82b34..00000000 --- a/src/routines/level2/xhpmv.cc +++ /dev/null @@ -1,64 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xhpmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xhpmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xhpmv::Xhpmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xhpmv::DoHpmv(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &ap_buffer, const size_t ap_offset, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific hermitian packed matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_HPMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - ap_buffer, ap_offset, n, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, true, 0, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xhpmv; -template class Xhpmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xhpmv.cpp b/src/routines/level2/xhpmv.cpp new file mode 100644 index 00000000..e6f82b34 --- /dev/null +++ b/src/routines/level2/xhpmv.cpp @@ -0,0 +1,64 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xhpmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpmv::Xhpmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpmv::DoHpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &ap_buffer, const size_t ap_offset, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian packed matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_HPMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + ap_buffer, ap_offset, n, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, true, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpmv; +template class Xhpmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cc deleted file mode 100644 index 225ebfe5..00000000 --- a/src/routines/level2/xhpr.cc +++ /dev/null @@ -1,51 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xhpr class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xhpr.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xhpr::Xhpr(Queue &queue, EventPointer event, const std::string &name): - Xher(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xhpr::DoHpr(const Layout layout, const Triangle triangle, - const size_t n, - const U alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &ap_buffer, const size_t ap_offset) { - - // Specific Xhpr functionality is implemented in the kernel using defines - return DoHer(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - ap_buffer, ap_offset, n, - true); // packed matrix -} - -// ================================================================================================= - -// Compiles the templated class -template class Xhpr; -template class Xhpr; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xhpr.cpp b/src/routines/level2/xhpr.cpp new file mode 100644 index 00000000..225ebfe5 --- /dev/null +++ b/src/routines/level2/xhpr.cpp @@ -0,0 +1,51 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xhpr.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpr::Xhpr(Queue &queue, EventPointer event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpr::DoHpr(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xhpr functionality is implemented in the kernel using defines + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpr; +template class Xhpr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xhpr2.cc b/src/routines/level2/xhpr2.cc deleted file mode 100644 index 85f9d3f9..00000000 --- a/src/routines/level2/xhpr2.cc +++ /dev/null @@ -1,53 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xhpr2 class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xhpr2.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xhpr2::Xhpr2(Queue &queue, EventPointer event, const std::string &name): - Xher2(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xhpr2::DoHpr2(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &ap_buffer, const size_t ap_offset) { - - // Specific Xhpr2 functionality is implemented in the kernel using defines - return DoHer2(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - y_buffer, y_offset, y_inc, - ap_buffer, ap_offset, n, - true); // packed matrix -} - -// ================================================================================================= - -// Compiles the templated class -template class Xhpr2; -template class Xhpr2; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xhpr2.cpp b/src/routines/level2/xhpr2.cpp new file mode 100644 index 00000000..85f9d3f9 --- /dev/null +++ b/src/routines/level2/xhpr2.cpp @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xhpr2.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpr2::Xhpr2(Queue &queue, EventPointer event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpr2::DoHpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xhpr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpr2; +template class Xhpr2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsbmv.cc b/src/routines/level2/xsbmv.cc deleted file mode 100644 index 28730899..00000000 --- a/src/routines/level2/xsbmv.cc +++ /dev/null @@ -1,65 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xsbmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xsbmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xsbmv::Xsbmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xsbmv::DoSbmv(const Layout layout, const Triangle triangle, - const size_t n, const size_t k, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific symmetric banded matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_SBMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, false, k, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xsbmv; -template class Xsbmv; -template class Xsbmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xsbmv.cpp b/src/routines/level2/xsbmv.cpp new file mode 100644 index 00000000..28730899 --- /dev/null +++ b/src/routines/level2/xsbmv.cpp @@ -0,0 +1,65 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsbmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xsbmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsbmv::Xsbmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsbmv::DoSbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific symmetric banded matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_SBMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, false, k, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsbmv; +template class Xsbmv; +template class Xsbmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xspmv.cc b/src/routines/level2/xspmv.cc deleted file mode 100644 index f6651012..00000000 --- a/src/routines/level2/xspmv.cc +++ /dev/null @@ -1,65 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xspmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xspmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xspmv::Xspmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xspmv::DoSpmv(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &ap_buffer, const size_t ap_offset, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific symmetric packed matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_SPMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - ap_buffer, ap_offset, n, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, true, 0, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xspmv; -template class Xspmv; -template class Xspmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xspmv.cpp b/src/routines/level2/xspmv.cpp new file mode 100644 index 00000000..f6651012 --- /dev/null +++ b/src/routines/level2/xspmv.cpp @@ -0,0 +1,65 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xspmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xspmv::Xspmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xspmv::DoSpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &ap_buffer, const size_t ap_offset, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific symmetric packed matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_SPMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + ap_buffer, ap_offset, n, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, true, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xspmv; +template class Xspmv; +template class Xspmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc deleted file mode 100644 index a75fe9c3..00000000 --- a/src/routines/level2/xspr.cc +++ /dev/null @@ -1,52 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xspr class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xspr.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xspr::Xspr(Queue &queue, EventPointer event, const std::string &name): - Xher(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &ap_buffer, const size_t ap_offset) { - - // Specific Xspr functionality is implemented in the kernel using defines - return DoHer(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - ap_buffer, ap_offset, n, - true); // packed matrix -} - -// ================================================================================================= - -// Compiles the templated class -template class Xspr; -template class Xspr; -template class Xspr; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xspr.cpp b/src/routines/level2/xspr.cpp new file mode 100644 index 00000000..a75fe9c3 --- /dev/null +++ b/src/routines/level2/xspr.cpp @@ -0,0 +1,52 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xspr.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xspr::Xspr(Queue &queue, EventPointer event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xspr::DoSpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xspr functionality is implemented in the kernel using defines + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xspr; +template class Xspr; +template class Xspr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cc deleted file mode 100644 index c39a2eb4..00000000 --- a/src/routines/level2/xspr2.cc +++ /dev/null @@ -1,54 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xspr2 class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xspr2.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xspr2::Xspr2(Queue &queue, EventPointer event, const std::string &name): - Xher2(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xspr2::DoSpr2(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &ap_buffer, const size_t ap_offset) { - - // Specific Xspr2 functionality is implemented in the kernel using defines - return DoHer2(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - y_buffer, y_offset, y_inc, - ap_buffer, ap_offset, n, - true); // packed matrix -} - -// ================================================================================================= - -// Compiles the templated class -template class Xspr2; -template class Xspr2; -template class Xspr2; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xspr2.cpp b/src/routines/level2/xspr2.cpp new file mode 100644 index 00000000..c39a2eb4 --- /dev/null +++ b/src/routines/level2/xspr2.cpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xspr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xspr2.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xspr2::Xspr2(Queue &queue, EventPointer event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xspr2::DoSpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &ap_buffer, const size_t ap_offset) { + + // Specific Xspr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + ap_buffer, ap_offset, n, + true); // packed matrix +} + +// ================================================================================================= + +// Compiles the templated class +template class Xspr2; +template class Xspr2; +template class Xspr2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cc deleted file mode 100644 index 648d2a3e..00000000 --- a/src/routines/level2/xsymv.cc +++ /dev/null @@ -1,65 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xsymv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xsymv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xsymv::Xsymv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xsymv::DoSymv(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific symmetric matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_SYMV define. - bool fast_kernels = false; - return MatVec(layout, Transpose::kNo, - n, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - fast_kernels, fast_kernels, - is_upper, false, 0, 0); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xsymv; -template class Xsymv; -template class Xsymv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xsymv.cpp b/src/routines/level2/xsymv.cpp new file mode 100644 index 00000000..648d2a3e --- /dev/null +++ b/src/routines/level2/xsymv.cpp @@ -0,0 +1,65 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsymv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xsymv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsymv::Xsymv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsymv::DoSymv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific symmetric matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_SYMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, false, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsymv; +template class Xsymv; +template class Xsymv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc deleted file mode 100644 index 758d8f8f..00000000 --- a/src/routines/level2/xsyr.cc +++ /dev/null @@ -1,51 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xsyr class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xsyr.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xsyr::Xsyr(Queue &queue, EventPointer event, const std::string &name): - Xher(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xsyr::DoSyr(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - - // Specific Xsyr functionality is implemented in the kernel using defines - return DoHer(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - a_buffer, a_offset, a_ld); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xsyr; -template class Xsyr; -template class Xsyr; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xsyr.cpp b/src/routines/level2/xsyr.cpp new file mode 100644 index 00000000..758d8f8f --- /dev/null +++ b/src/routines/level2/xsyr.cpp @@ -0,0 +1,51 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xsyr.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsyr::Xsyr(Queue &queue, EventPointer event, const std::string &name): + Xher(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsyr::DoSyr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Specific Xsyr functionality is implemented in the kernel using defines + return DoHer(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsyr; +template class Xsyr; +template class Xsyr; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cc deleted file mode 100644 index 6f43b219..00000000 --- a/src/routines/level2/xsyr2.cc +++ /dev/null @@ -1,53 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xsyr2 class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xsyr2.hpp" - -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xsyr2::Xsyr2(Queue &queue, EventPointer event, const std::string &name): - Xher2(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xsyr2::DoSyr2(const Layout layout, const Triangle triangle, - const size_t n, - const T alpha, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { - - // Specific Xsyr2 functionality is implemented in the kernel using defines - return DoHer2(layout, triangle, n, alpha, - x_buffer, x_offset, x_inc, - y_buffer, y_offset, y_inc, - a_buffer, a_offset, a_ld); -} - -// ================================================================================================= - -// Compiles the templated class -template class Xsyr2; -template class Xsyr2; -template class Xsyr2; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xsyr2.cpp b/src/routines/level2/xsyr2.cpp new file mode 100644 index 00000000..6f43b219 --- /dev/null +++ b/src/routines/level2/xsyr2.cpp @@ -0,0 +1,53 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xsyr2 class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xsyr2.hpp" + +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xsyr2::Xsyr2(Queue &queue, EventPointer event, const std::string &name): + Xher2(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xsyr2::DoSyr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld) { + + // Specific Xsyr2 functionality is implemented in the kernel using defines + return DoHer2(layout, triangle, n, alpha, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + a_buffer, a_offset, a_ld); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xsyr2; +template class Xsyr2; +template class Xsyr2; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xtbmv.cc b/src/routines/level2/xtbmv.cc deleted file mode 100644 index e315c544..00000000 --- a/src/routines/level2/xtbmv.cc +++ /dev/null @@ -1,82 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xtbmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xtbmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xtbmv::Xtbmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xtbmv::DoTbmv(const Layout layout, const Triangle triangle, - const Transpose a_transpose, const Diagonal diagonal, - const size_t n, const size_t k, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { - - // Creates a copy of X: a temporary scratch buffer - auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); - try { - x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); - } catch (...) { } // Continues: error-code is returned in MatVec - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Adds '2' to the parameter if the diagonal is unit - auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific triangular banded matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_TBMV define. - auto fast_kernels = false; - auto status = MatVec(layout, a_transpose, - n, n, static_cast(1), - a_buffer, a_offset, a_ld, - scratch_buffer, x_offset, x_inc, static_cast(0), - x_buffer, x_offset, x_inc, - fast_kernels, fast_kernels, - parameter, false, k, 0); - - // Returns the proper error code (renames vector Y to X) - switch(status) { - case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; - case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; - case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; - default: return status; - } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xtbmv; -template class Xtbmv; -template class Xtbmv; -template class Xtbmv; -template class Xtbmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp new file mode 100644 index 00000000..e315c544 --- /dev/null +++ b/src/routines/level2/xtbmv.cpp @@ -0,0 +1,82 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xtbmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xtbmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xtbmv::Xtbmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xtbmv::DoTbmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, const size_t k, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { + + // Creates a copy of X: a temporary scratch buffer + auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); + try { + x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); + } catch (...) { } // Continues: error-code is returned in MatVec + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Adds '2' to the parameter if the diagonal is unit + auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific triangular banded matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_TBMV define. + auto fast_kernels = false; + auto status = MatVec(layout, a_transpose, + n, n, static_cast(1), + a_buffer, a_offset, a_ld, + scratch_buffer, x_offset, x_inc, static_cast(0), + x_buffer, x_offset, x_inc, + fast_kernels, fast_kernels, + parameter, false, k, 0); + + // Returns the proper error code (renames vector Y to X) + switch(status) { + case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; + case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; + case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; + default: return status; + } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xtbmv; +template class Xtbmv; +template class Xtbmv; +template class Xtbmv; +template class Xtbmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xtpmv.cc b/src/routines/level2/xtpmv.cc deleted file mode 100644 index 46811089..00000000 --- a/src/routines/level2/xtpmv.cc +++ /dev/null @@ -1,82 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xtpmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xtpmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xtpmv::Xtpmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xtpmv::DoTpmv(const Layout layout, const Triangle triangle, - const Transpose a_transpose, const Diagonal diagonal, - const size_t n, - const Buffer &ap_buffer, const size_t ap_offset, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { - - // Creates a copy of X: a temporary scratch buffer - auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); - try { - x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); - } catch (...) { } // Continues: error-code is returned in MatVec - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Adds '2' to the parameter if the diagonal is unit - auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific triangular packed matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_TPMV define. - auto fast_kernels = false; - auto status = MatVec(layout, a_transpose, - n, n, static_cast(1), - ap_buffer, ap_offset, n, - scratch_buffer, x_offset, x_inc, static_cast(0), - x_buffer, x_offset, x_inc, - fast_kernels, fast_kernels, - parameter, true, 0, 0); - - // Returns the proper error code (renames vector Y to X) - switch(status) { - case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; - case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; - case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; - default: return status; - } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xtpmv; -template class Xtpmv; -template class Xtpmv; -template class Xtpmv; -template class Xtpmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xtpmv.cpp b/src/routines/level2/xtpmv.cpp new file mode 100644 index 00000000..46811089 --- /dev/null +++ b/src/routines/level2/xtpmv.cpp @@ -0,0 +1,82 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xtpmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xtpmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xtpmv::Xtpmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xtpmv::DoTpmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer &ap_buffer, const size_t ap_offset, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { + + // Creates a copy of X: a temporary scratch buffer + auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); + try { + x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); + } catch (...) { } // Continues: error-code is returned in MatVec + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Adds '2' to the parameter if the diagonal is unit + auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific triangular packed matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_TPMV define. + auto fast_kernels = false; + auto status = MatVec(layout, a_transpose, + n, n, static_cast(1), + ap_buffer, ap_offset, n, + scratch_buffer, x_offset, x_inc, static_cast(0), + x_buffer, x_offset, x_inc, + fast_kernels, fast_kernels, + parameter, true, 0, 0); + + // Returns the proper error code (renames vector Y to X) + switch(status) { + case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; + case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; + case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; + default: return status; + } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xtpmv; +template class Xtpmv; +template class Xtpmv; +template class Xtpmv; +template class Xtpmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xtrmv.cc b/src/routines/level2/xtrmv.cc deleted file mode 100644 index d2f24252..00000000 --- a/src/routines/level2/xtrmv.cc +++ /dev/null @@ -1,82 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file implements the Xtrmv class (see the header for information about the class). -// -// ================================================================================================= - -#include "routines/level2/xtrmv.hpp" - -#include -#include - -namespace clblast { -// ================================================================================================= - -// Constructor: forwards to base class constructor -template -Xtrmv::Xtrmv(Queue &queue, EventPointer event, const std::string &name): - Xgemv(queue, event, name) { -} - -// ================================================================================================= - -// The main routine -template -StatusCode Xtrmv::DoTrmv(const Layout layout, const Triangle triangle, - const Transpose a_transpose, const Diagonal diagonal, - const size_t n, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { - - // Creates a copy of X: a temporary scratch buffer - auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); - try { - x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); - } catch (...) { } // Continues: error-code is returned in MatVec - - // The data is either in the upper or lower triangle - size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || - (triangle == Triangle::kLower && layout == Layout::kRowMajor)); - - // Adds '2' to the parameter if the diagonal is unit - auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; - - // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. - // The specific triangular matrix-accesses are implemented in the kernel guarded by the - // ROUTINE_TRMV define. - auto fast_kernels = false; - auto status = MatVec(layout, a_transpose, - n, n, static_cast(1), - a_buffer, a_offset, a_ld, - scratch_buffer, x_offset, x_inc, static_cast(0), - x_buffer, x_offset, x_inc, - fast_kernels, fast_kernels, - parameter, false, 0, 0); - - // Returns the proper error code (renames vector Y to X) - switch(status) { - case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; - case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; - case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; - default: return status; - } -} - -// ================================================================================================= - -// Compiles the templated class -template class Xtrmv; -template class Xtrmv; -template class Xtrmv; -template class Xtrmv; -template class Xtrmv; - -// ================================================================================================= -} // namespace clblast diff --git a/src/routines/level2/xtrmv.cpp b/src/routines/level2/xtrmv.cpp new file mode 100644 index 00000000..d2f24252 --- /dev/null +++ b/src/routines/level2/xtrmv.cpp @@ -0,0 +1,82 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xtrmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xtrmv.hpp" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xtrmv::Xtrmv(Queue &queue, EventPointer event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xtrmv::DoTrmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc) { + + // Creates a copy of X: a temporary scratch buffer + auto scratch_buffer = Buffer(context_, n*x_inc + x_offset); + try { + x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); + } catch (...) { } // Continues: error-code is returned in MatVec + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Adds '2' to the parameter if the diagonal is unit + auto parameter = (diagonal == Diagonal::kUnit) ? is_upper + 2 : is_upper; + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific triangular matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_TRMV define. + auto fast_kernels = false; + auto status = MatVec(layout, a_transpose, + n, n, static_cast(1), + a_buffer, a_offset, a_ld, + scratch_buffer, x_offset, x_inc, static_cast(0), + x_buffer, x_offset, x_inc, + fast_kernels, fast_kernels, + parameter, false, 0, 0); + + // Returns the proper error code (renames vector Y to X) + switch(status) { + case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX; + case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX; + case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX; + default: return status; + } +} + +// ================================================================================================= + +// Compiles the templated class +template class Xtrmv; +template class Xtrmv; +template class Xtrmv; +template class Xtrmv; +template class Xtrmv; + +// ================================================================================================= +} // namespace clblast -- cgit v1.2.3