summaryrefslogtreecommitdiff
path: root/src/routines/level2
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level2')
-rw-r--r--src/routines/level2/xgbmv.cpp (renamed from src/routines/level2/xgbmv.cc)3
-rw-r--r--src/routines/level2/xgbmv.hpp49
-rw-r--r--src/routines/level2/xgemv.cpp (renamed from src/routines/level2/xgemv.cc)35
-rw-r--r--src/routines/level2/xgemv.hpp56
-rw-r--r--src/routines/level2/xger.cpp (renamed from src/routines/level2/xger.cc)31
-rw-r--r--src/routines/level2/xger.hpp43
-rw-r--r--src/routines/level2/xgerc.cpp (renamed from src/routines/level2/xgerc.cc)2
-rw-r--r--src/routines/level2/xgerc.hpp46
-rw-r--r--src/routines/level2/xgeru.cpp (renamed from src/routines/level2/xgeru.cc)2
-rw-r--r--src/routines/level2/xgeru.hpp46
-rw-r--r--src/routines/level2/xhbmv.cpp (renamed from src/routines/level2/xhbmv.cc)2
-rw-r--r--src/routines/level2/xhbmv.hpp49
-rw-r--r--src/routines/level2/xhemv.cpp (renamed from src/routines/level2/xhemv.cc)2
-rw-r--r--src/routines/level2/xhemv.hpp49
-rw-r--r--src/routines/level2/xher.cpp (renamed from src/routines/level2/xher.cc)38
-rw-r--r--src/routines/level2/xher.hpp46
-rw-r--r--src/routines/level2/xher2.cpp (renamed from src/routines/level2/xher2.cc)33
-rw-r--r--src/routines/level2/xher2.hpp44
-rw-r--r--src/routines/level2/xhpmv.cpp (renamed from src/routines/level2/xhpmv.cc)2
-rw-r--r--src/routines/level2/xhpmv.hpp49
-rw-r--r--src/routines/level2/xhpr.cpp (renamed from src/routines/level2/xhpr.cc)2
-rw-r--r--src/routines/level2/xhpr.hpp45
-rw-r--r--src/routines/level2/xhpr2.cpp (renamed from src/routines/level2/xhpr2.cc)2
-rw-r--r--src/routines/level2/xhpr2.hpp46
-rw-r--r--src/routines/level2/xsbmv.cpp (renamed from src/routines/level2/xsbmv.cc)3
-rw-r--r--src/routines/level2/xsbmv.hpp49
-rw-r--r--src/routines/level2/xspmv.cpp (renamed from src/routines/level2/xspmv.cc)3
-rw-r--r--src/routines/level2/xspmv.hpp49
-rw-r--r--src/routines/level2/xspr.cpp (renamed from src/routines/level2/xspr.cc)3
-rw-r--r--src/routines/level2/xspr.hpp45
-rw-r--r--src/routines/level2/xspr2.cpp (renamed from src/routines/level2/xspr2.cc)3
-rw-r--r--src/routines/level2/xspr2.hpp46
-rw-r--r--src/routines/level2/xsymv.cpp (renamed from src/routines/level2/xsymv.cc)3
-rw-r--r--src/routines/level2/xsymv.hpp49
-rw-r--r--src/routines/level2/xsyr.cpp (renamed from src/routines/level2/xsyr.cc)3
-rw-r--r--src/routines/level2/xsyr.hpp45
-rw-r--r--src/routines/level2/xsyr2.cpp (renamed from src/routines/level2/xsyr2.cc)3
-rw-r--r--src/routines/level2/xsyr2.hpp46
-rw-r--r--src/routines/level2/xtbmv.cpp (renamed from src/routines/level2/xtbmv.cc)3
-rw-r--r--src/routines/level2/xtbmv.hpp49
-rw-r--r--src/routines/level2/xtpmv.cpp (renamed from src/routines/level2/xtpmv.cc)3
-rw-r--r--src/routines/level2/xtpmv.hpp49
-rw-r--r--src/routines/level2/xtrmv.cpp (renamed from src/routines/level2/xtrmv.cc)3
-rw-r--r--src/routines/level2/xtrmv.hpp49
44 files changed, 1137 insertions, 91 deletions
diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cpp
index f90e26b2..ea4f001c 100644
--- a/src/routines/level2/xgbmv.cc
+++ b/src/routines/level2/xgbmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xgbmv.h"
+#include "routines/level2/xgbmv.hpp"
#include <string>
#include <vector>
@@ -58,6 +58,7 @@ StatusCode Xgbmv<T>::DoGbmv(const Layout layout, const Transpose a_transpose,
// =================================================================================================
// Compiles the templated class
+template class Xgbmv<half>;
template class Xgbmv<float>;
template class Xgbmv<double>;
template class Xgbmv<float2>;
diff --git a/src/routines/level2/xgbmv.hpp b/src/routines/level2/xgbmv.hpp
new file mode 100644
index 00000000..686ab642
--- /dev/null
+++ b/src/routines/level2/xgbmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgbmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xgbmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGBMV_H_
+#define CLBLAST_ROUTINES_XGBMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgbmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGbmv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGBMV_H_
+#endif
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cpp
index f8985038..21fb397c 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xgemv.h"
+#include "routines/level2/xgemv.hpp"
#include <string>
#include <vector>
@@ -19,18 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xgemv<float>::precision_ = Precision::kSingle;
-template <> const Precision Xgemv<double>::precision_ = Precision::kDouble;
-template <> const Precision Xgemv<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Pad", "Xgemv"}, precision_) {
+ Routine(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/xgemv.opencl"
#include "../../kernels/level2/xgemv_fast.opencl"
@@ -100,12 +92,12 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Tests the matrix and the vectors for validity
auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); }
- else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); }
+ if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
+ else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); }
if (ErrorIn(status)) { return status; }
- status = TestVectorX(n_real, x_buffer, x_offset, x_inc, sizeof(T));
+ status = TestVectorX(n_real, x_buffer, x_offset, x_inc);
if (ErrorIn(status)) { return status; }
- status = TestVectorY(m_real, y_buffer, y_offset, y_inc, sizeof(T));
+ status = TestVectorY(m_real, y_buffer, y_offset, y_inc);
if (ErrorIn(status)) { return status; }
// Determines whether or not the fast-version can be used
@@ -134,16 +126,22 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
local_size = db_["WGS3"];
}
+ // Upload the scalar arguments as constant buffers to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ auto beta_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+ beta_buffer.Write(queue_, 1, &beta);
+
// Retrieves the Xgemv kernel from the compiled binary
try {
- const auto program = GetProgramFromCache();
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(m_real));
kernel.SetArgument(1, static_cast<int>(n_real));
- kernel.SetArgument(2, alpha);
- kernel.SetArgument(3, beta);
+ kernel.SetArgument(2, alpha_buffer());
+ kernel.SetArgument(3, beta_buffer());
kernel.SetArgument(4, static_cast<int>(a_rotated));
kernel.SetArgument(5, a_buffer());
kernel.SetArgument(6, static_cast<int>(a_offset));
@@ -162,7 +160,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Launches the kernel
auto global = std::vector<size_t>{global_size};
auto local = std::vector<size_t>{local_size};
- status = RunKernel(kernel, global, local, event_);
+ status = RunKernel(kernel, queue_, device_, global, local, event_);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
@@ -173,6 +171,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// =================================================================================================
// Compiles the templated class
+template class Xgemv<half>;
template class Xgemv<float>;
template class Xgemv<double>;
template class Xgemv<float2>;
diff --git a/src/routines/level2/xgemv.hpp b/src/routines/level2/xgemv.hpp
new file mode 100644
index 00000000..e9afec8d
--- /dev/null
+++ b/src/routines/level2/xgemv.hpp
@@ -0,0 +1,56 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgemv routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGEMV_H_
+#define CLBLAST_ROUTINES_XGEMV_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgemv: public Routine {
+ public:
+
+ // Constructor
+ Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+
+ // Generic version used also for other matrix-vector multiplications
+ StatusCode MatVec(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ bool fast_kernel, bool fast_kernel_rot,
+ const size_t parameter, const bool packed,
+ const size_t kl, const size_t ku);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGEMV_H_
+#endif
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cpp
index 686c7e60..353047d2 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xger.h"
+#include "routines/level2/xger.hpp"
#include <string>
#include <vector>
@@ -19,18 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xger<float>::precision_ = Precision::kSingle;
-template <> const Precision Xger<double>::precision_ = Precision::kDouble;
-template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xger<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xger.opencl"
@@ -57,22 +49,26 @@ StatusCode Xger<T>::DoGer(const Layout layout,
const auto a_two = (a_is_rowmajor) ? m : n;
// Tests the matrix and the vectors for validity
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T));
+ auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
if (ErrorIn(status)) { return status; }
- status = TestVectorX(m, x_buffer, x_offset, x_inc, sizeof(T));
+ status = TestVectorX(m, x_buffer, x_offset, x_inc);
if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T));
+ status = TestVectorY(n, y_buffer, y_offset, y_inc);
if (ErrorIn(status)) { return status; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache();
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xger");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(a_one));
kernel.SetArgument(1, static_cast<int>(a_two));
- kernel.SetArgument(2, alpha);
+ kernel.SetArgument(2, alpha_buffer());
kernel.SetArgument(3, x_buffer());
kernel.SetArgument(4, static_cast<int>(x_offset));
kernel.SetArgument(5, static_cast<int>(x_inc));
@@ -89,7 +85,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]);
auto global = std::vector<size_t>{a_one_ceiled, a_two_ceiled};
auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, global, local, event_);
+ status = RunKernel(kernel, queue_, device_, global, local, event_);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
@@ -100,6 +96,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// =================================================================================================
// Compiles the templated class
+template class Xger<half>;
template class Xger<float>;
template class Xger<double>;
template class Xger<float2>;
diff --git a/src/routines/level2/xger.hpp b/src/routines/level2/xger.hpp
new file mode 100644
index 00000000..3c6abe44
--- /dev/null
+++ b/src/routines/level2/xger.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xger routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGER_H_
+#define CLBLAST_ROUTINES_XGER_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xger: public Routine {
+ public:
+
+ // Constructor
+ Xger(Queue &queue, EventPointer event, const std::string &name = "GER");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGer(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGER_H_
+#endif
diff --git a/src/routines/level2/xgerc.cc b/src/routines/level2/xgerc.cpp
index 73284b52..d9feda97 100644
--- a/src/routines/level2/xgerc.cc
+++ b/src/routines/level2/xgerc.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xgerc.h"
+#include "routines/level2/xgerc.hpp"
#include <string>
diff --git a/src/routines/level2/xgerc.hpp b/src/routines/level2/xgerc.hpp
new file mode 100644
index 00000000..f1d04dfd
--- /dev/null
+++ b/src/routines/level2/xgerc.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgerc routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGERC_H_
+#define CLBLAST_ROUTINES_XGERC_H_
+
+#include "routines/level2/xger.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgerc: public Xger<T> {
+ public:
+
+ // Uses the regular Xger routine
+ using Xger<T>::DoGer;
+
+ // Constructor
+ Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGERC_H_
+#endif
diff --git a/src/routines/level2/xgeru.cc b/src/routines/level2/xgeru.cpp
index 7730d6a5..da9e91c2 100644
--- a/src/routines/level2/xgeru.cc
+++ b/src/routines/level2/xgeru.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xgeru.h"
+#include "routines/level2/xgeru.hpp"
#include <string>
diff --git a/src/routines/level2/xgeru.hpp b/src/routines/level2/xgeru.hpp
new file mode 100644
index 00000000..fb50e917
--- /dev/null
+++ b/src/routines/level2/xgeru.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xgeru routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XGERU_H_
+#define CLBLAST_ROUTINES_XGERU_H_
+
+#include "routines/level2/xger.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xgeru: public Xger<T> {
+ public:
+
+ // Uses the regular Xger routine
+ using Xger<T>::DoGer;
+
+ // Constructor
+ Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoGeru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XGERU_H_
+#endif
diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cpp
index 58591b50..f6c0e3c4 100644
--- a/src/routines/level2/xhbmv.cc
+++ b/src/routines/level2/xhbmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xhbmv.h"
+#include "routines/level2/xhbmv.hpp"
#include <string>
#include <vector>
diff --git a/src/routines/level2/xhbmv.hpp b/src/routines/level2/xhbmv.hpp
new file mode 100644
index 00000000..d668eb88
--- /dev/null
+++ b/src/routines/level2/xhbmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhbmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xhbmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHBMV_H_
+#define CLBLAST_ROUTINES_XHBMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xhbmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHBMV_H_
+#endif
diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cpp
index b4ef0fa4..2cbcf7b4 100644
--- a/src/routines/level2/xhemv.cc
+++ b/src/routines/level2/xhemv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xhemv.h"
+#include "routines/level2/xhemv.hpp"
#include <string>
#include <vector>
diff --git a/src/routines/level2/xhemv.hpp b/src/routines/level2/xhemv.hpp
new file mode 100644
index 00000000..8e062fd3
--- /dev/null
+++ b/src/routines/level2/xhemv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhemv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xhemv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHEMV_H_
+#define CLBLAST_ROUTINES_XHEMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xhemv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHEMV_H_
+#endif
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cpp
index a7116213..ed8ba9e9 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cpp
@@ -11,25 +11,17 @@
//
// =================================================================================================
-#include "internal/routines/level2/xher.h"
+#include "routines/level2/xher.hpp"
#include <string>
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xher<float, float>::precision_ = Precision::kSingle;
-template <> const Precision Xher<double, double>::precision_ = Precision::kDouble;
-template <> const Precision Xher<float2, float>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xher<double2, double>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher.opencl"
@@ -43,6 +35,7 @@ template <> float2 Xher<float2,float>::GetAlpha(const float alpha) { return floa
template <> double2 Xher<double2,double>::GetAlpha(const double alpha) { return double2{alpha, 0.0}; }
template <> float Xher<float,float>::GetAlpha(const float alpha) { return alpha; }
template <> double Xher<double,double>::GetAlpha(const double alpha) { return alpha; }
+template <> half Xher<half,half>::GetAlpha(const half alpha) { return alpha; }
// =================================================================================================
@@ -63,28 +56,32 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
(triangle == Triangle::kLower && layout == Layout::kRowMajor));
const auto is_rowmajor = (layout == Layout::kRowMajor);
- // Creates a matching version of alpha
- const auto matching_alpha = GetAlpha(alpha);
-
// Tests the matrix and the vectors for validity
auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); }
- else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); }
+ if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
+ else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
if (ErrorIn(status)) { return status; }
- status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T));
+ status = TestVectorX(n, x_buffer, x_offset, x_inc);
if (ErrorIn(status)) { return status; }
// If alpha is zero an update is not required
if (alpha == U{0}) { return StatusCode::kSuccess; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Creates a matching version of alpha
+ const auto matching_alpha = GetAlpha(alpha);
+
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &matching_alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache();
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xher");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, matching_alpha);
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));
@@ -99,7 +96,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
auto global = std::vector<size_t>{global_one, global_two};
auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, global, local, event_);
+ status = RunKernel(kernel, queue_, device_, global, local, event_);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
@@ -110,6 +107,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xher<half, half>;
template class Xher<float, float>;
template class Xher<double, double>;
template class Xher<float2, float>;
diff --git a/src/routines/level2/xher.hpp b/src/routines/level2/xher.hpp
new file mode 100644
index 00000000..9ff6bf3f
--- /dev/null
+++ b/src/routines/level2/xher.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xher routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHER_H_
+#define CLBLAST_ROUTINES_XHER_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T, typename U>
+class Xher: public Routine {
+ public:
+
+ // Constructor
+ Xher(Queue &queue, EventPointer event, const std::string &name = "HER");
+
+ // Translates alpha of type 'U' into type 'T'
+ T GetAlpha(const U alpha);
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHer(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed = false);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHER_H_
+#endif
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cpp
index 3fd1a961..50572cea 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cpp
@@ -11,25 +11,17 @@
//
// =================================================================================================
-#include "internal/routines/level2/xher2.h"
+#include "routines/level2/xher2.hpp"
#include <string>
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xher2<float>::precision_ = Precision::kSingle;
-template <> const Precision Xher2<double>::precision_ = Precision::kDouble;
-template <> const Precision Xher2<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xher2<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher2.opencl"
@@ -58,22 +50,26 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// Tests the matrix and the vectors for validity
auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); }
- else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); }
+ if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
+ else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
if (ErrorIn(status)) { return status; }
- status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T));
+ status = TestVectorX(n, x_buffer, x_offset, x_inc);
if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T));
+ status = TestVectorY(n, y_buffer, y_offset, y_inc);
if (ErrorIn(status)) { return status; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache();
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xher2");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));
@@ -91,7 +87,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
auto global = std::vector<size_t>{global_one, global_two};
auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, global, local, event_);
+ status = RunKernel(kernel, queue_, device_, global, local, event_);
if (ErrorIn(status)) { return status; }
// Succesfully finished the computation
@@ -102,6 +98,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xher2<half>;
template class Xher2<float>;
template class Xher2<double>;
template class Xher2<float2>;
diff --git a/src/routines/level2/xher2.hpp b/src/routines/level2/xher2.hpp
new file mode 100644
index 00000000..8c53c047
--- /dev/null
+++ b/src/routines/level2/xher2.hpp
@@ -0,0 +1,44 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xher2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHER2_H_
+#define CLBLAST_ROUTINES_XHER2_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xher2: public Routine {
+ public:
+
+ // Constructor
+ Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHer2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed = false);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHER2_H_
+#endif
diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cpp
index 92686dbe..e6f82b34 100644
--- a/src/routines/level2/xhpmv.cc
+++ b/src/routines/level2/xhpmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xhpmv.h"
+#include "routines/level2/xhpmv.hpp"
#include <string>
#include <vector>
diff --git a/src/routines/level2/xhpmv.hpp b/src/routines/level2/xhpmv.hpp
new file mode 100644
index 00000000..b11192f9
--- /dev/null
+++ b/src/routines/level2/xhpmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhpmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xhpmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHPMV_H_
+#define CLBLAST_ROUTINES_XHPMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xhpmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHPMV_H_
+#endif
diff --git a/src/routines/level2/xhpr.cc b/src/routines/level2/xhpr.cpp
index 4b31ad09..225ebfe5 100644
--- a/src/routines/level2/xhpr.cc
+++ b/src/routines/level2/xhpr.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xhpr.h"
+#include "routines/level2/xhpr.hpp"
#include <string>
diff --git a/src/routines/level2/xhpr.hpp b/src/routines/level2/xhpr.hpp
new file mode 100644
index 00000000..37801c68
--- /dev/null
+++ b/src/routines/level2/xhpr.hpp
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhpr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHPR_H_
+#define CLBLAST_ROUTINES_XHPR_H_
+
+#include "routines/level2/xher.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T, typename U>
+class Xhpr: public Xher<T,U> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,U>::DoHer;
+
+ // Constructor
+ Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHPR_H_
+#endif
diff --git a/src/routines/level2/xhpr2.cc b/src/routines/level2/xhpr2.cpp
index 9be24f43..85f9d3f9 100644
--- a/src/routines/level2/xhpr2.cc
+++ b/src/routines/level2/xhpr2.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xhpr2.h"
+#include "routines/level2/xhpr2.hpp"
#include <string>
diff --git a/src/routines/level2/xhpr2.hpp b/src/routines/level2/xhpr2.hpp
new file mode 100644
index 00000000..d66dce55
--- /dev/null
+++ b/src/routines/level2/xhpr2.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xhpr2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XHPR2_H_
+#define CLBLAST_ROUTINES_XHPR2_H_
+
+#include "routines/level2/xher2.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xhpr2: public Xher2<T> {
+ public:
+
+ // Uses the regular Xher2 routine
+ using Xher2<T>::DoHer2;
+
+ // Constructor
+ Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoHpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XHPR2_H_
+#endif
diff --git a/src/routines/level2/xsbmv.cc b/src/routines/level2/xsbmv.cpp
index bc82c88d..28730899 100644
--- a/src/routines/level2/xsbmv.cc
+++ b/src/routines/level2/xsbmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xsbmv.h"
+#include "routines/level2/xsbmv.hpp"
#include <string>
#include <vector>
@@ -57,6 +57,7 @@ StatusCode Xsbmv<T>::DoSbmv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsbmv<half>;
template class Xsbmv<float>;
template class Xsbmv<double>;
diff --git a/src/routines/level2/xsbmv.hpp b/src/routines/level2/xsbmv.hpp
new file mode 100644
index 00000000..16c5e9a8
--- /dev/null
+++ b/src/routines/level2/xsbmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsbmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xsbmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSBMV_H_
+#define CLBLAST_ROUTINES_XSBMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsbmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSBMV_H_
+#endif
diff --git a/src/routines/level2/xspmv.cc b/src/routines/level2/xspmv.cpp
index 6e00dcfa..f6651012 100644
--- a/src/routines/level2/xspmv.cc
+++ b/src/routines/level2/xspmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xspmv.h"
+#include "routines/level2/xspmv.hpp"
#include <string>
#include <vector>
@@ -57,6 +57,7 @@ StatusCode Xspmv<T>::DoSpmv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xspmv<half>;
template class Xspmv<float>;
template class Xspmv<double>;
diff --git a/src/routines/level2/xspmv.hpp b/src/routines/level2/xspmv.hpp
new file mode 100644
index 00000000..a0c69b85
--- /dev/null
+++ b/src/routines/level2/xspmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xspmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xspmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSPMV_H_
+#define CLBLAST_ROUTINES_XSPMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xspmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSPMV_H_
+#endif
diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cpp
index 55af2f29..a75fe9c3 100644
--- a/src/routines/level2/xspr.cc
+++ b/src/routines/level2/xspr.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xspr.h"
+#include "routines/level2/xspr.hpp"
#include <string>
@@ -44,6 +44,7 @@ StatusCode Xspr<T>::DoSpr(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xspr<half>;
template class Xspr<float>;
template class Xspr<double>;
diff --git a/src/routines/level2/xspr.hpp b/src/routines/level2/xspr.hpp
new file mode 100644
index 00000000..6468c736
--- /dev/null
+++ b/src/routines/level2/xspr.hpp
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xspr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSPR_H_
+#define CLBLAST_ROUTINES_XSPR_H_
+
+#include "routines/level2/xher.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xspr: public Xher<T,T> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,T>::DoHer;
+
+ // Constructor
+ Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSPR_H_
+#endif
diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cpp
index 9a3f97ce..c39a2eb4 100644
--- a/src/routines/level2/xspr2.cc
+++ b/src/routines/level2/xspr2.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xspr2.h"
+#include "routines/level2/xspr2.hpp"
#include <string>
@@ -46,6 +46,7 @@ StatusCode Xspr2<T>::DoSpr2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xspr2<half>;
template class Xspr2<float>;
template class Xspr2<double>;
diff --git a/src/routines/level2/xspr2.hpp b/src/routines/level2/xspr2.hpp
new file mode 100644
index 00000000..693c56a1
--- /dev/null
+++ b/src/routines/level2/xspr2.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xspr2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSPR2_H_
+#define CLBLAST_ROUTINES_XSPR2_H_
+
+#include "routines/level2/xher2.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xspr2: public Xher2<T> {
+ public:
+
+ // Uses the regular Xher2 routine
+ using Xher2<T>::DoHer2;
+
+ // Constructor
+ Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSPR2_H_
+#endif
diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cpp
index a9eb284f..648d2a3e 100644
--- a/src/routines/level2/xsymv.cc
+++ b/src/routines/level2/xsymv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xsymv.h"
+#include "routines/level2/xsymv.hpp"
#include <string>
#include <vector>
@@ -57,6 +57,7 @@ StatusCode Xsymv<T>::DoSymv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsymv<half>;
template class Xsymv<float>;
template class Xsymv<double>;
diff --git a/src/routines/level2/xsymv.hpp b/src/routines/level2/xsymv.hpp
new file mode 100644
index 00000000..67815f2f
--- /dev/null
+++ b/src/routines/level2/xsymv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsymv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xsymv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSYMV_H_
+#define CLBLAST_ROUTINES_XSYMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsymv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSYMV_H_
+#endif
diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cpp
index 4b3928e5..758d8f8f 100644
--- a/src/routines/level2/xsyr.cc
+++ b/src/routines/level2/xsyr.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xsyr.h"
+#include "routines/level2/xsyr.hpp"
#include <string>
@@ -43,6 +43,7 @@ StatusCode Xsyr<T>::DoSyr(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsyr<half>;
template class Xsyr<float>;
template class Xsyr<double>;
diff --git a/src/routines/level2/xsyr.hpp b/src/routines/level2/xsyr.hpp
new file mode 100644
index 00000000..20393454
--- /dev/null
+++ b/src/routines/level2/xsyr.hpp
@@ -0,0 +1,45 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsyr routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSYR_H_
+#define CLBLAST_ROUTINES_XSYR_H_
+
+#include "routines/level2/xher.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsyr: public Xher<T,T> {
+ public:
+
+ // Uses the regular Xher routine
+ using Xher<T,T>::DoHer;
+
+ // Constructor
+ Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSyr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSYR_H_
+#endif
diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cpp
index 3ae389e0..6f43b219 100644
--- a/src/routines/level2/xsyr2.cc
+++ b/src/routines/level2/xsyr2.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xsyr2.h"
+#include "routines/level2/xsyr2.hpp"
#include <string>
@@ -45,6 +45,7 @@ StatusCode Xsyr2<T>::DoSyr2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsyr2<half>;
template class Xsyr2<float>;
template class Xsyr2<double>;
diff --git a/src/routines/level2/xsyr2.hpp b/src/routines/level2/xsyr2.hpp
new file mode 100644
index 00000000..1a8dcbe8
--- /dev/null
+++ b/src/routines/level2/xsyr2.hpp
@@ -0,0 +1,46 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsyr2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSYR2_H_
+#define CLBLAST_ROUTINES_XSYR2_H_
+
+#include "routines/level2/xher2.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsyr2: public Xher2<T> {
+ public:
+
+ // Uses the regular Xher2 routine
+ using Xher2<T>::DoHer2;
+
+ // Constructor
+ Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoSyr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSYR2_H_
+#endif
diff --git a/src/routines/level2/xtbmv.cc b/src/routines/level2/xtbmv.cpp
index 47371c87..e315c544 100644
--- a/src/routines/level2/xtbmv.cc
+++ b/src/routines/level2/xtbmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xtbmv.h"
+#include "routines/level2/xtbmv.hpp"
#include <string>
#include <vector>
@@ -72,6 +72,7 @@ StatusCode Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xtbmv<half>;
template class Xtbmv<float>;
template class Xtbmv<double>;
template class Xtbmv<float2>;
diff --git a/src/routines/level2/xtbmv.hpp b/src/routines/level2/xtbmv.hpp
new file mode 100644
index 00000000..389e9705
--- /dev/null
+++ b/src/routines/level2/xtbmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xtbmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xtbmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XTBMV_H_
+#define CLBLAST_ROUTINES_XTBMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xtbmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::queue_;
+ using Xgemv<T>::context_;
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoTbmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n, const size_t k,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XTBMV_H_
+#endif
diff --git a/src/routines/level2/xtpmv.cc b/src/routines/level2/xtpmv.cpp
index c63cb9b2..46811089 100644
--- a/src/routines/level2/xtpmv.cc
+++ b/src/routines/level2/xtpmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xtpmv.h"
+#include "routines/level2/xtpmv.hpp"
#include <string>
#include <vector>
@@ -72,6 +72,7 @@ StatusCode Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xtpmv<half>;
template class Xtpmv<float>;
template class Xtpmv<double>;
template class Xtpmv<float2>;
diff --git a/src/routines/level2/xtpmv.hpp b/src/routines/level2/xtpmv.hpp
new file mode 100644
index 00000000..0e8cf1d2
--- /dev/null
+++ b/src/routines/level2/xtpmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xtpmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xtpmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XTPMV_H_
+#define CLBLAST_ROUTINES_XTPMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xtpmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::queue_;
+ using Xgemv<T>::context_;
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoTpmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XTPMV_H_
+#endif
diff --git a/src/routines/level2/xtrmv.cc b/src/routines/level2/xtrmv.cpp
index 9111d41d..d2f24252 100644
--- a/src/routines/level2/xtrmv.cc
+++ b/src/routines/level2/xtrmv.cpp
@@ -11,7 +11,7 @@
//
// =================================================================================================
-#include "internal/routines/level2/xtrmv.h"
+#include "routines/level2/xtrmv.hpp"
#include <string>
#include <vector>
@@ -72,6 +72,7 @@ StatusCode Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xtrmv<half>;
template class Xtrmv<float>;
template class Xtrmv<double>;
template class Xtrmv<float2>;
diff --git a/src/routines/level2/xtrmv.hpp b/src/routines/level2/xtrmv.hpp
new file mode 100644
index 00000000..07dd7841
--- /dev/null
+++ b/src/routines/level2/xtrmv.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xtrmv routine. It is based on the generalized mat-vec multiplication
+// routine (Xgemv). The Xtrmv class inherits from the templated class Xgemv, allowing it to call the
+// "MatVec" function directly.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XTRMV_H_
+#define CLBLAST_ROUTINES_XTRMV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xtrmv: public Xgemv<T> {
+ public:
+
+ // Uses the generic matrix-vector routine
+ using Xgemv<T>::queue_;
+ using Xgemv<T>::context_;
+ using Xgemv<T>::MatVec;
+
+ // Constructor
+ Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoTrmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XTRMV_H_
+#endif