diff options
Diffstat (limited to 'src/routines/level2')
-rw-r--r-- | src/routines/level2/xgbmv.cpp (renamed from src/routines/level2/xgbmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xgbmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cpp (renamed from src/routines/level2/xgemv.cc) | 35 | ||||
-rw-r--r-- | src/routines/level2/xgemv.hpp | 56 | ||||
-rw-r--r-- | src/routines/level2/xger.cpp (renamed from src/routines/level2/xger.cc) | 31 | ||||
-rw-r--r-- | src/routines/level2/xger.hpp | 43 | ||||
-rw-r--r-- | src/routines/level2/xgerc.cpp (renamed from src/routines/level2/xgerc.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xgerc.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xgeru.cpp (renamed from src/routines/level2/xgeru.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xgeru.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xhbmv.cpp (renamed from src/routines/level2/xhbmv.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xhbmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xhemv.cpp (renamed from src/routines/level2/xhemv.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xhemv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xher.cpp (renamed from src/routines/level2/xher.cc) | 38 | ||||
-rw-r--r-- | src/routines/level2/xher.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xher2.cpp (renamed from src/routines/level2/xher2.cc) | 33 | ||||
-rw-r--r-- | src/routines/level2/xher2.hpp | 44 | ||||
-rw-r--r-- | src/routines/level2/xhpmv.cpp (renamed from src/routines/level2/xhpmv.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xhpmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xhpr.cpp (renamed from src/routines/level2/xhpr.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xhpr.hpp | 45 | ||||
-rw-r--r-- | src/routines/level2/xhpr2.cpp (renamed from src/routines/level2/xhpr2.cc) | 2 | ||||
-rw-r--r-- | src/routines/level2/xhpr2.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xsbmv.cpp (renamed from src/routines/level2/xsbmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xsbmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xspmv.cpp (renamed from src/routines/level2/xspmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xspmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xspr.cpp (renamed from src/routines/level2/xspr.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xspr.hpp | 45 | ||||
-rw-r--r-- | src/routines/level2/xspr2.cpp (renamed from src/routines/level2/xspr2.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xspr2.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xsymv.cpp (renamed from src/routines/level2/xsymv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xsymv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xsyr.cpp (renamed from src/routines/level2/xsyr.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xsyr.hpp | 45 | ||||
-rw-r--r-- | src/routines/level2/xsyr2.cpp (renamed from src/routines/level2/xsyr2.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xsyr2.hpp | 46 | ||||
-rw-r--r-- | src/routines/level2/xtbmv.cpp (renamed from src/routines/level2/xtbmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xtbmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xtpmv.cpp (renamed from src/routines/level2/xtpmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xtpmv.hpp | 49 | ||||
-rw-r--r-- | src/routines/level2/xtrmv.cpp (renamed from src/routines/level2/xtrmv.cc) | 3 | ||||
-rw-r--r-- | src/routines/level2/xtrmv.hpp | 49 |
44 files changed, 1137 insertions, 91 deletions
diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cpp index f90e26b2..ea4f001c 100644 --- a/src/routines/level2/xgbmv.cc +++ b/src/routines/level2/xgbmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgbmv.h" +#include "routines/level2/xgbmv.hpp" #include <string> #include <vector> @@ -58,6 +58,7 @@ StatusCode Xgbmv<T>::DoGbmv(const Layout layout, const Transpose a_transpose, // ================================================================================================= // Compiles the templated class +template class Xgbmv<half>; template class Xgbmv<float>; template class Xgbmv<double>; template class Xgbmv<float2>; diff --git a/src/routines/level2/xgbmv.hpp b/src/routines/level2/xgbmv.hpp new file mode 100644 index 00000000..686ab642 --- /dev/null +++ b/src/routines/level2/xgbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xgbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGBMV_H_ +#define CLBLAST_ROUTINES_XGBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV"); + + // Templated-precision implementation of the routine + StatusCode DoGbmv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGBMV_H_ +#endif diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cpp index f8985038..21fb397c 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgemv.h" +#include "routines/level2/xgemv.hpp" #include <string> #include <vector> @@ -19,18 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xgemv<float>::precision_ = Precision::kSingle; -template <> const Precision Xgemv<double>::precision_ = Precision::kDouble; -template <> const Precision Xgemv<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Pad", "Xgemv"}, precision_) { + Routine(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" @@ -100,12 +92,12 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n_real, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n_real, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(m_real, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(m_real, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used @@ -134,16 +126,22 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, local_size = db_["WGS3"]; } + // Upload the scalar arguments as constant buffers to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + auto beta_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + beta_buffer.Write(queue_, 1, &beta); + // Retrieves the Xgemv kernel from the compiled binary try { - const auto program = GetProgramFromCache(); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(m_real)); kernel.SetArgument(1, static_cast<int>(n_real)); - kernel.SetArgument(2, alpha); - kernel.SetArgument(3, beta); + kernel.SetArgument(2, alpha_buffer()); + kernel.SetArgument(3, beta_buffer()); kernel.SetArgument(4, static_cast<int>(a_rotated)); kernel.SetArgument(5, a_buffer()); kernel.SetArgument(6, static_cast<int>(a_offset)); @@ -162,7 +160,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // Launches the kernel auto global = std::vector<size_t>{global_size}; auto local = std::vector<size_t>{local_size}; - status = RunKernel(kernel, global, local, event_); + status = RunKernel(kernel, queue_, device_, global, local, event_); if (ErrorIn(status)) { return status; } // Succesfully finished the computation @@ -173,6 +171,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // ================================================================================================= // Compiles the templated class +template class Xgemv<half>; template class Xgemv<float>; template class Xgemv<double>; template class Xgemv<float2>; diff --git a/src/routines/level2/xgemv.hpp b/src/routines/level2/xgemv.hpp new file mode 100644 index 00000000..e9afec8d --- /dev/null +++ b/src/routines/level2/xgemv.hpp @@ -0,0 +1,56 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgemv routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGEMV_H_ +#define CLBLAST_ROUTINES_XGEMV_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgemv: public Routine { + public: + + // Constructor + Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV"); + + // Templated-precision implementation of the routine + StatusCode DoGemv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); + + // Generic version used also for other matrix-vector multiplications + StatusCode MatVec(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, + const size_t kl, const size_t ku); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGEMV_H_ +#endif diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cpp index 686c7e60..353047d2 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xger.h" +#include "routines/level2/xger.hpp" #include <string> #include <vector> @@ -19,18 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xger<float>::precision_ = Precision::kSingle; -template <> const Precision Xger<double>::precision_ = Precision::kDouble; -template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xger<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xger.opencl" @@ -57,22 +49,26 @@ StatusCode Xger<T>::DoGer(const Layout layout, const auto a_two = (a_is_rowmajor) ? m : n; // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestVectorX(m, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(m, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } - // Retrieves the Xgemv kernel from the compiled binary + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xger"); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(a_one)); kernel.SetArgument(1, static_cast<int>(a_two)); - kernel.SetArgument(2, alpha); + kernel.SetArgument(2, alpha_buffer()); kernel.SetArgument(3, x_buffer()); kernel.SetArgument(4, static_cast<int>(x_offset)); kernel.SetArgument(5, static_cast<int>(x_inc)); @@ -89,7 +85,7 @@ StatusCode Xger<T>::DoGer(const Layout layout, auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]); auto global = std::vector<size_t>{a_one_ceiled, a_two_ceiled}; auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, global, local, event_); + status = RunKernel(kernel, queue_, device_, global, local, event_); if (ErrorIn(status)) { return status; } // Succesfully finished the computation @@ -100,6 +96,7 @@ StatusCode Xger<T>::DoGer(const Layout layout, // ================================================================================================= // Compiles the templated class +template class Xger<half>; template class Xger<float>; template class Xger<double>; template class Xger<float2>; diff --git a/src/routines/level2/xger.hpp b/src/routines/level2/xger.hpp new file mode 100644 index 00000000..3c6abe44 --- /dev/null +++ b/src/routines/level2/xger.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xger routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGER_H_ +#define CLBLAST_ROUTINES_XGER_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xger: public Routine { + public: + + // Constructor + Xger(Queue &queue, EventPointer event, const std::string &name = "GER"); + + // Templated-precision implementation of the routine + StatusCode DoGer(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGER_H_ +#endif diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cpp index 73284b52..d9feda97 100644 --- a/src/routines/level2/xgerc.cc +++ b/src/routines/level2/xgerc.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgerc.h" +#include "routines/level2/xgerc.hpp" #include <string> diff --git a/src/routines/level2/xgerc.hpp b/src/routines/level2/xgerc.hpp new file mode 100644 index 00000000..f1d04dfd --- /dev/null +++ b/src/routines/level2/xgerc.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgerc routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERC_H_ +#define CLBLAST_ROUTINES_XGERC_H_ + +#include "routines/level2/xger.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgerc: public Xger<T> { + public: + + // Uses the regular Xger routine + using Xger<T>::DoGer; + + // Constructor + Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC"); + + // Templated-precision implementation of the routine + StatusCode DoGerc(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERC_H_ +#endif diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cpp index 7730d6a5..da9e91c2 100644 --- a/src/routines/level2/xgeru.cc +++ b/src/routines/level2/xgeru.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xgeru.h" +#include "routines/level2/xgeru.hpp" #include <string> diff --git a/src/routines/level2/xgeru.hpp b/src/routines/level2/xgeru.hpp new file mode 100644 index 00000000..fb50e917 --- /dev/null +++ b/src/routines/level2/xgeru.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgeru routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XGERU_H_ +#define CLBLAST_ROUTINES_XGERU_H_ + +#include "routines/level2/xger.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xgeru: public Xger<T> { + public: + + // Uses the regular Xger routine + using Xger<T>::DoGer; + + // Constructor + Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU"); + + // Templated-precision implementation of the routine + StatusCode DoGeru(const Layout layout, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XGERU_H_ +#endif diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cpp index 58591b50..f6c0e3c4 100644 --- a/src/routines/level2/xhbmv.cc +++ b/src/routines/level2/xhbmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhbmv.h" +#include "routines/level2/xhbmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhbmv.hpp b/src/routines/level2/xhbmv.hpp new file mode 100644 index 00000000..d668eb88 --- /dev/null +++ b/src/routines/level2/xhbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHBMV_H_ +#define CLBLAST_ROUTINES_XHBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV"); + + // Templated-precision implementation of the routine + StatusCode DoHbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHBMV_H_ +#endif diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cpp index b4ef0fa4..2cbcf7b4 100644 --- a/src/routines/level2/xhemv.cc +++ b/src/routines/level2/xhemv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhemv.h" +#include "routines/level2/xhemv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhemv.hpp b/src/routines/level2/xhemv.hpp new file mode 100644 index 00000000..8e062fd3 --- /dev/null +++ b/src/routines/level2/xhemv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhemv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhemv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHEMV_H_ +#define CLBLAST_ROUTINES_XHEMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhemv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV"); + + // Templated-precision implementation of the routine + StatusCode DoHemv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHEMV_H_ +#endif diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cpp index a7116213..ed8ba9e9 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cpp @@ -11,25 +11,17 @@ // // ================================================================================================= -#include "internal/routines/level2/xher.h" +#include "routines/level2/xher.hpp" #include <string> namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher<float, float>::precision_ = Precision::kSingle; -template <> const Precision Xher<double, double>::precision_ = Precision::kDouble; -template <> const Precision Xher<float2, float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher<double2, double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher.opencl" @@ -43,6 +35,7 @@ template <> float2 Xher<float2,float>::GetAlpha(const float alpha) { return floa template <> double2 Xher<double2,double>::GetAlpha(const double alpha) { return double2{alpha, 0.0}; } template <> float Xher<float,float>::GetAlpha(const float alpha) { return alpha; } template <> double Xher<double,double>::GetAlpha(const double alpha) { return alpha; } +template <> half Xher<half,half>::GetAlpha(const half alpha) { return alpha; } // ================================================================================================= @@ -63,28 +56,32 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, (triangle == Triangle::kLower && layout == Layout::kRowMajor)); const auto is_rowmajor = (layout == Layout::kRowMajor); - // Creates a matching version of alpha - const auto matching_alpha = GetAlpha(alpha); - // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } // If alpha is zero an update is not required if (alpha == U{0}) { return StatusCode::kSuccess; } - // Retrieves the Xgemv kernel from the compiled binary + // Creates a matching version of alpha + const auto matching_alpha = GetAlpha(alpha); + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &matching_alpha); + + // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xher"); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, matching_alpha); + kernel.SetArgument(1, alpha_buffer()); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); @@ -99,7 +96,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); auto global = std::vector<size_t>{global_one, global_two}; auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, global, local, event_); + status = RunKernel(kernel, queue_, device_, global, local, event_); if (ErrorIn(status)) { return status; } // Succesfully finished the computation @@ -110,6 +107,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xher<half, half>; template class Xher<float, float>; template class Xher<double, double>; template class Xher<float2, float>; diff --git a/src/routines/level2/xher.hpp b/src/routines/level2/xher.hpp new file mode 100644 index 00000000..9ff6bf3f --- /dev/null +++ b/src/routines/level2/xher.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xher routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER_H_ +#define CLBLAST_ROUTINES_XHER_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xher: public Routine { + public: + + // Constructor + Xher(Queue &queue, EventPointer event, const std::string &name = "HER"); + + // Translates alpha of type 'U' into type 'T' + T GetAlpha(const U alpha); + + // Templated-precision implementation of the routine + StatusCode DoHer(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER_H_ +#endif diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cpp index 3fd1a961..50572cea 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cpp @@ -11,25 +11,17 @@ // // ================================================================================================= -#include "internal/routines/level2/xher2.h" +#include "routines/level2/xher2.hpp" #include <string> namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher2<float>::precision_ = Precision::kSingle; -template <> const Precision Xher2<double>::precision_ = Precision::kDouble; -template <> const Precision Xher2<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher2<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher2.opencl" @@ -58,22 +50,26 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } - // Retrieves the Xgemv kernel from the compiled binary + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xher2"); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha); + kernel.SetArgument(1, alpha_buffer()); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); @@ -91,7 +87,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]); auto global = std::vector<size_t>{global_one, global_two}; auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]}; - status = RunKernel(kernel, global, local, event_); + status = RunKernel(kernel, queue_, device_, global, local, event_); if (ErrorIn(status)) { return status; } // Succesfully finished the computation @@ -102,6 +98,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xher2<half>; template class Xher2<float>; template class Xher2<double>; template class Xher2<float2>; diff --git a/src/routines/level2/xher2.hpp b/src/routines/level2/xher2.hpp new file mode 100644 index 00000000..8c53c047 --- /dev/null +++ b/src/routines/level2/xher2.hpp @@ -0,0 +1,44 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xher2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHER2_H_ +#define CLBLAST_ROUTINES_XHER2_H_ + +#include "routine.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xher2: public Routine { + public: + + // Constructor + Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2"); + + // Templated-precision implementation of the routine + StatusCode DoHer2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const bool packed = false); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHER2_H_ +#endif diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cpp index 92686dbe..e6f82b34 100644 --- a/src/routines/level2/xhpmv.cc +++ b/src/routines/level2/xhpmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpmv.h" +#include "routines/level2/xhpmv.hpp" #include <string> #include <vector> diff --git a/src/routines/level2/xhpmv.hpp b/src/routines/level2/xhpmv.hpp new file mode 100644 index 00000000..b11192f9 --- /dev/null +++ b/src/routines/level2/xhpmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xhpmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPMV_H_ +#define CLBLAST_ROUTINES_XHPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhpmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV"); + + // Templated-precision implementation of the routine + StatusCode DoHpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPMV_H_ +#endif diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cpp index 4b31ad09..225ebfe5 100644 --- a/src/routines/level2/xhpr.cc +++ b/src/routines/level2/xhpr.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpr.h" +#include "routines/level2/xhpr.hpp" #include <string> diff --git a/src/routines/level2/xhpr.hpp b/src/routines/level2/xhpr.hpp new file mode 100644 index 00000000..37801c68 --- /dev/null +++ b/src/routines/level2/xhpr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR_H_ +#define CLBLAST_ROUTINES_XHPR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T, typename U> +class Xhpr: public Xher<T,U> { + public: + + // Uses the regular Xher routine + using Xher<T,U>::DoHer; + + // Constructor + Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR"); + + // Templated-precision implementation of the routine + StatusCode DoHpr(const Layout layout, const Triangle triangle, + const size_t n, + const U alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR_H_ +#endif diff --git a/src/routines/level2/xhpr2.cc b/src/routines/level2/xhpr2.cpp index 9be24f43..85f9d3f9 100644 --- a/src/routines/level2/xhpr2.cc +++ b/src/routines/level2/xhpr2.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xhpr2.h" +#include "routines/level2/xhpr2.hpp" #include <string> diff --git a/src/routines/level2/xhpr2.hpp b/src/routines/level2/xhpr2.hpp new file mode 100644 index 00000000..d66dce55 --- /dev/null +++ b/src/routines/level2/xhpr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xhpr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XHPR2_H_ +#define CLBLAST_ROUTINES_XHPR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xhpr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2"); + + // Templated-precision implementation of the routine + StatusCode DoHpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XHPR2_H_ +#endif diff --git a/src/routines/level2/xsbmv.cc b/src/routines/level2/xsbmv.cpp index bc82c88d..28730899 100644 --- a/src/routines/level2/xsbmv.cc +++ b/src/routines/level2/xsbmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsbmv.h" +#include "routines/level2/xsbmv.hpp" #include <string> #include <vector> @@ -57,6 +57,7 @@ StatusCode Xsbmv<T>::DoSbmv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsbmv<half>; template class Xsbmv<float>; template class Xsbmv<double>; diff --git a/src/routines/level2/xsbmv.hpp b/src/routines/level2/xsbmv.hpp new file mode 100644 index 00000000..16c5e9a8 --- /dev/null +++ b/src/routines/level2/xsbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xsbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSBMV_H_ +#define CLBLAST_ROUTINES_XSBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV"); + + // Templated-precision implementation of the routine + StatusCode DoSbmv(const Layout layout, const Triangle triangle, + const size_t n, const size_t k, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSBMV_H_ +#endif diff --git a/src/routines/level2/xspmv.cc b/src/routines/level2/xspmv.cpp index 6e00dcfa..f6651012 100644 --- a/src/routines/level2/xspmv.cc +++ b/src/routines/level2/xspmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspmv.h" +#include "routines/level2/xspmv.hpp" #include <string> #include <vector> @@ -57,6 +57,7 @@ StatusCode Xspmv<T>::DoSpmv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xspmv<half>; template class Xspmv<float>; template class Xspmv<double>; diff --git a/src/routines/level2/xspmv.hpp b/src/routines/level2/xspmv.hpp new file mode 100644 index 00000000..a0c69b85 --- /dev/null +++ b/src/routines/level2/xspmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xspmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPMV_H_ +#define CLBLAST_ROUTINES_XSPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV"); + + // Templated-precision implementation of the routine + StatusCode DoSpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPMV_H_ +#endif diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cpp index 55af2f29..a75fe9c3 100644 --- a/src/routines/level2/xspr.cc +++ b/src/routines/level2/xspr.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspr.h" +#include "routines/level2/xspr.hpp" #include <string> @@ -44,6 +44,7 @@ StatusCode Xspr<T>::DoSpr(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xspr<half>; template class Xspr<float>; template class Xspr<double>; diff --git a/src/routines/level2/xspr.hpp b/src/routines/level2/xspr.hpp new file mode 100644 index 00000000..6468c736 --- /dev/null +++ b/src/routines/level2/xspr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR_H_ +#define CLBLAST_ROUTINES_XSPR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspr: public Xher<T,T> { + public: + + // Uses the regular Xher routine + using Xher<T,T>::DoHer; + + // Constructor + Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR"); + + // Templated-precision implementation of the routine + StatusCode DoSpr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR_H_ +#endif diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cpp index 9a3f97ce..c39a2eb4 100644 --- a/src/routines/level2/xspr2.cc +++ b/src/routines/level2/xspr2.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xspr2.h" +#include "routines/level2/xspr2.hpp" #include <string> @@ -46,6 +46,7 @@ StatusCode Xspr2<T>::DoSpr2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xspr2<half>; template class Xspr2<float>; template class Xspr2<double>; diff --git a/src/routines/level2/xspr2.hpp b/src/routines/level2/xspr2.hpp new file mode 100644 index 00000000..693c56a1 --- /dev/null +++ b/src/routines/level2/xspr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xspr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSPR2_H_ +#define CLBLAST_ROUTINES_XSPR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xspr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2"); + + // Templated-precision implementation of the routine + StatusCode DoSpr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &ap_buffer, const size_t ap_offset); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSPR2_H_ +#endif diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cpp index a9eb284f..648d2a3e 100644 --- a/src/routines/level2/xsymv.cc +++ b/src/routines/level2/xsymv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsymv.h" +#include "routines/level2/xsymv.hpp" #include <string> #include <vector> @@ -57,6 +57,7 @@ StatusCode Xsymv<T>::DoSymv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsymv<half>; template class Xsymv<float>; template class Xsymv<double>; diff --git a/src/routines/level2/xsymv.hpp b/src/routines/level2/xsymv.hpp new file mode 100644 index 00000000..67815f2f --- /dev/null +++ b/src/routines/level2/xsymv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsymv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xsymv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYMV_H_ +#define CLBLAST_ROUTINES_XSYMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsymv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::MatVec; + + // Constructor + Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV"); + + // Templated-precision implementation of the routine + StatusCode DoSymv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYMV_H_ +#endif diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cpp index 4b3928e5..758d8f8f 100644 --- a/src/routines/level2/xsyr.cc +++ b/src/routines/level2/xsyr.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsyr.h" +#include "routines/level2/xsyr.hpp" #include <string> @@ -43,6 +43,7 @@ StatusCode Xsyr<T>::DoSyr(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsyr<half>; template class Xsyr<float>; template class Xsyr<double>; diff --git a/src/routines/level2/xsyr.hpp b/src/routines/level2/xsyr.hpp new file mode 100644 index 00000000..20393454 --- /dev/null +++ b/src/routines/level2/xsyr.hpp @@ -0,0 +1,45 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyr routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR_H_ +#define CLBLAST_ROUTINES_XSYR_H_ + +#include "routines/level2/xher.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyr: public Xher<T,T> { + public: + + // Uses the regular Xher routine + using Xher<T,T>::DoHer; + + // Constructor + Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR"); + + // Templated-precision implementation of the routine + StatusCode DoSyr(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR_H_ +#endif diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cpp index 3ae389e0..6f43b219 100644 --- a/src/routines/level2/xsyr2.cc +++ b/src/routines/level2/xsyr2.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xsyr2.h" +#include "routines/level2/xsyr2.hpp" #include <string> @@ -45,6 +45,7 @@ StatusCode Xsyr2<T>::DoSyr2(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xsyr2<half>; template class Xsyr2<float>; template class Xsyr2<double>; diff --git a/src/routines/level2/xsyr2.hpp b/src/routines/level2/xsyr2.hpp new file mode 100644 index 00000000..1a8dcbe8 --- /dev/null +++ b/src/routines/level2/xsyr2.hpp @@ -0,0 +1,46 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyr2 routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XSYR2_H_ +#define CLBLAST_ROUTINES_XSYR2_H_ + +#include "routines/level2/xher2.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xsyr2: public Xher2<T> { + public: + + // Uses the regular Xher2 routine + using Xher2<T>::DoHer2; + + // Constructor + Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2"); + + // Templated-precision implementation of the routine + StatusCode DoSyr2(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XSYR2_H_ +#endif diff --git a/src/routines/level2/xtbmv.cc b/src/routines/level2/xtbmv.cpp index 47371c87..e315c544 100644 --- a/src/routines/level2/xtbmv.cc +++ b/src/routines/level2/xtbmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtbmv.h" +#include "routines/level2/xtbmv.hpp" #include <string> #include <vector> @@ -72,6 +72,7 @@ StatusCode Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xtbmv<half>; template class Xtbmv<float>; template class Xtbmv<double>; template class Xtbmv<float2>; diff --git a/src/routines/level2/xtbmv.hpp b/src/routines/level2/xtbmv.hpp new file mode 100644 index 00000000..389e9705 --- /dev/null +++ b/src/routines/level2/xtbmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtbmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtbmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTBMV_H_ +#define CLBLAST_ROUTINES_XTBMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtbmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV"); + + // Templated-precision implementation of the routine + StatusCode DoTbmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, const size_t k, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTBMV_H_ +#endif diff --git a/src/routines/level2/xtpmv.cc b/src/routines/level2/xtpmv.cpp index c63cb9b2..46811089 100644 --- a/src/routines/level2/xtpmv.cc +++ b/src/routines/level2/xtpmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtpmv.h" +#include "routines/level2/xtpmv.hpp" #include <string> #include <vector> @@ -72,6 +72,7 @@ StatusCode Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xtpmv<half>; template class Xtpmv<float>; template class Xtpmv<double>; template class Xtpmv<float2>; diff --git a/src/routines/level2/xtpmv.hpp b/src/routines/level2/xtpmv.hpp new file mode 100644 index 00000000..0e8cf1d2 --- /dev/null +++ b/src/routines/level2/xtpmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtpmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtpmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTPMV_H_ +#define CLBLAST_ROUTINES_XTPMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtpmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV"); + + // Templated-precision implementation of the routine + StatusCode DoTpmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &ap_buffer, const size_t ap_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTPMV_H_ +#endif diff --git a/src/routines/level2/xtrmv.cc b/src/routines/level2/xtrmv.cpp index 9111d41d..d2f24252 100644 --- a/src/routines/level2/xtrmv.cc +++ b/src/routines/level2/xtrmv.cpp @@ -11,7 +11,7 @@ // // ================================================================================================= -#include "internal/routines/level2/xtrmv.h" +#include "routines/level2/xtrmv.hpp" #include <string> #include <vector> @@ -72,6 +72,7 @@ StatusCode Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle, // ================================================================================================= // Compiles the templated class +template class Xtrmv<half>; template class Xtrmv<float>; template class Xtrmv<double>; template class Xtrmv<float2>; diff --git a/src/routines/level2/xtrmv.hpp b/src/routines/level2/xtrmv.hpp new file mode 100644 index 00000000..07dd7841 --- /dev/null +++ b/src/routines/level2/xtrmv.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtrmv routine. It is based on the generalized mat-vec multiplication +// routine (Xgemv). The Xtrmv class inherits from the templated class Xgemv, allowing it to call the +// "MatVec" function directly. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTRMV_H_ +#define CLBLAST_ROUTINES_XTRMV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtrmv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV"); + + // Templated-precision implementation of the routine + StatusCode DoTrmv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTRMV_H_ +#endif |