summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/internal/routines/level1/xamax.h4
-rw-r--r--include/internal/routines/level1/xasum.h4
-rw-r--r--include/internal/routines/level1/xaxpy.h4
-rw-r--r--include/internal/routines/level1/xcopy.h4
-rw-r--r--include/internal/routines/level1/xdot.h4
-rw-r--r--include/internal/routines/level1/xnrm2.h4
-rw-r--r--include/internal/routines/level1/xscal.h4
-rw-r--r--include/internal/routines/level1/xswap.h4
-rw-r--r--include/internal/routines/level2/xgemv.h4
-rw-r--r--include/internal/routines/level2/xger.h4
-rw-r--r--include/internal/routines/level2/xher.h4
-rw-r--r--include/internal/routines/level2/xher2.h4
-rw-r--r--include/internal/routines/level3/xher2k.h4
-rw-r--r--include/internal/routines/level3/xherk.h4
-rw-r--r--include/internal/routines/level3/xsyr2k.h4
-rw-r--r--include/internal/routines/level3/xsyrk.h4
-rw-r--r--include/internal/routines/levelx/xomatcopy.h4
-rw-r--r--include/internal/utilities.h4
-rw-r--r--src/routines/level1/xamax.cc13
-rw-r--r--src/routines/level1/xasum.cc13
-rw-r--r--src/routines/level1/xaxpy.cc13
-rw-r--r--src/routines/level1/xcopy.cc13
-rw-r--r--src/routines/level1/xdot.cc13
-rw-r--r--src/routines/level1/xnrm2.cc13
-rw-r--r--src/routines/level1/xscal.cc13
-rw-r--r--src/routines/level1/xswap.cc13
-rw-r--r--src/routines/level2/xgemv.cc13
-rw-r--r--src/routines/level2/xger.cc13
-rw-r--r--src/routines/level2/xher.cc13
-rw-r--r--src/routines/level2/xher2.cc13
-rw-r--r--src/routines/level3/xgemm.cc13
-rw-r--r--src/routines/level3/xhemm.cc2
-rw-r--r--src/routines/level3/xher2k.cc10
-rw-r--r--src/routines/level3/xherk.cc10
-rw-r--r--src/routines/level3/xsymm.cc2
-rw-r--r--src/routines/level3/xsyr2k.cc13
-rw-r--r--src/routines/level3/xsyrk.cc13
-rw-r--r--src/routines/level3/xtrmm.cc2
-rw-r--r--src/routines/levelx/xomatcopy.cc13
-rw-r--r--src/utilities.cc7
40 files changed, 50 insertions, 263 deletions
diff --git a/include/internal/routines/level1/xamax.h b/include/internal/routines/level1/xamax.h
index ec1de346..8b80044e 100644
--- a/include/internal/routines/level1/xamax.h
+++ b/include/internal/routines/level1/xamax.h
@@ -40,10 +40,6 @@ class Xamax: public Routine<T> {
StatusCode DoAmax(const size_t n,
const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xasum.h b/include/internal/routines/level1/xasum.h
index b6c8e4e6..fe8529e3 100644
--- a/include/internal/routines/level1/xasum.h
+++ b/include/internal/routines/level1/xasum.h
@@ -40,10 +40,6 @@ class Xasum: public Routine<T> {
StatusCode DoAsum(const size_t n,
const Buffer<T> &asum_buffer, const size_t asum_offset,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h
index 71e7c01c..af48086a 100644
--- a/include/internal/routines/level1/xaxpy.h
+++ b/include/internal/routines/level1/xaxpy.h
@@ -40,10 +40,6 @@ class Xaxpy: public Routine<T> {
StatusCode DoAxpy(const size_t n, const T alpha,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h
index de9edaaf..eb245192 100644
--- a/include/internal/routines/level1/xcopy.h
+++ b/include/internal/routines/level1/xcopy.h
@@ -40,10 +40,6 @@ class Xcopy: public Routine<T> {
StatusCode DoCopy(const size_t n,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h
index b70ff3fe..ce26d267 100644
--- a/include/internal/routines/level1/xdot.h
+++ b/include/internal/routines/level1/xdot.h
@@ -42,10 +42,6 @@ class Xdot: public Routine<T> {
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
const bool do_conjugate = false);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h
index 1cb22728..5186acc5 100644
--- a/include/internal/routines/level1/xnrm2.h
+++ b/include/internal/routines/level1/xnrm2.h
@@ -40,10 +40,6 @@ class Xnrm2: public Routine<T> {
StatusCode DoNrm2(const size_t n,
const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h
index c2b2c1bf..6c82dd89 100644
--- a/include/internal/routines/level1/xscal.h
+++ b/include/internal/routines/level1/xscal.h
@@ -39,10 +39,6 @@ class Xscal: public Routine<T> {
// Templated-precision implementation of the routine
StatusCode DoScal(const size_t n, const T alpha,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h
index 45e34dd6..6a568377 100644
--- a/include/internal/routines/level1/xswap.h
+++ b/include/internal/routines/level1/xswap.h
@@ -40,10 +40,6 @@ class Xswap: public Routine<T> {
StatusCode DoSwap(const size_t n,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h
index b28536bd..b1277079 100644
--- a/include/internal/routines/level2/xgemv.h
+++ b/include/internal/routines/level2/xgemv.h
@@ -56,10 +56,6 @@ class Xgemv: public Routine<T> {
bool fast_kernel, bool fast_kernel_rot,
const size_t parameter, const bool packed,
const size_t kl, const size_t ku);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h
index 996e0fc8..aab5075d 100644
--- a/include/internal/routines/level2/xger.h
+++ b/include/internal/routines/level2/xger.h
@@ -43,10 +43,6 @@ class Xger: public Routine<T> {
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h
index a4a25c3c..97ccfde7 100644
--- a/include/internal/routines/level2/xher.h
+++ b/include/internal/routines/level2/xher.h
@@ -46,10 +46,6 @@ class Xher: public Routine<T> {
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const bool packed = false);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h
index de8583f4..067f85e6 100644
--- a/include/internal/routines/level2/xher2.h
+++ b/include/internal/routines/level2/xher2.h
@@ -44,10 +44,6 @@ class Xher2: public Routine<T> {
const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const bool packed = false);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h
index 092d7246..dfeb737e 100644
--- a/include/internal/routines/level3/xher2k.h
+++ b/include/internal/routines/level3/xher2k.h
@@ -46,10 +46,6 @@ class Xher2k: public Routine<T> {
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const U beta,
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h
index b5e2d723..7ea59579 100644
--- a/include/internal/routines/level3/xherk.h
+++ b/include/internal/routines/level3/xherk.h
@@ -45,10 +45,6 @@ class Xherk: public Routine<T> {
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const U beta,
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h
index c7ae1678..aefd016d 100644
--- a/include/internal/routines/level3/xsyr2k.h
+++ b/include/internal/routines/level3/xsyr2k.h
@@ -46,10 +46,6 @@ class Xsyr2k: public Routine<T> {
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h
index 860f8e10..75726496 100644
--- a/include/internal/routines/level3/xsyrk.h
+++ b/include/internal/routines/level3/xsyrk.h
@@ -47,10 +47,6 @@ class Xsyrk: public Routine<T> {
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const T beta,
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/routines/levelx/xomatcopy.h b/include/internal/routines/levelx/xomatcopy.h
index 29f33aac..e5c0529f 100644
--- a/include/internal/routines/levelx/xomatcopy.h
+++ b/include/internal/routines/levelx/xomatcopy.h
@@ -41,10 +41,6 @@ class Xomatcopy: public Routine<T> {
const size_t m, const size_t n, const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
-
- private:
- // Static variable to get the precision
- const static Precision precision_;
};
// =================================================================================================
diff --git a/include/internal/utilities.h b/include/internal/utilities.h
index 26145528..7092bcdd 100644
--- a/include/internal/utilities.h
+++ b/include/internal/utilities.h
@@ -240,6 +240,10 @@ bool IsMultiple(const size_t a, const size_t b);
// Convert the precision enum into bytes, e.g. a double takes up 8 bytes
size_t GetBytes(const Precision precision);
+// Convert the template argument into a precision value
+template <typename T>
+Precision PrecisionValue();
+
// =================================================================================================
// Returns false is this precision is not supported by the device
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc
index 9e203d03..1a3441ef 100644
--- a/src/routines/level1/xamax.cc
+++ b/src/routines/level1/xamax.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xamax<half>::precision_ = Precision::kHalf;
-template <> const Precision Xamax<float>::precision_ = Precision::kSingle;
-template <> const Precision Xamax<double>::precision_ = Precision::kDouble;
-template <> const Precision Xamax<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xamax<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xdot"}, precision_) {
+ Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/xamax.opencl"
;
@@ -56,7 +47,7 @@ StatusCode Xamax<T>::DoAmax(const size_t n,
// Retrieves the Xamax kernels from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel1 = Kernel(program, "Xamax");
auto kernel2 = Kernel(program, "XamaxEpilogue");
diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc
index f4d898be..85c6e1ed 100644
--- a/src/routines/level1/xasum.cc
+++ b/src/routines/level1/xasum.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xasum<half>::precision_ = Precision::kHalf;
-template <> const Precision Xasum<float>::precision_ = Precision::kSingle;
-template <> const Precision Xasum<double>::precision_ = Precision::kDouble;
-template <> const Precision Xasum<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xasum<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xasum<T>::Xasum(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xdot"}, precision_) {
+ Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/xasum.opencl"
;
@@ -56,7 +47,7 @@ StatusCode Xasum<T>::DoAsum(const size_t n,
// Retrieves the Xasum kernels from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel1 = Kernel(program, "Xasum");
auto kernel2 = Kernel(program, "XasumEpilogue");
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index 221e1195..39121067 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xaxpy<half>::precision_ = Precision::kHalf;
-template <> const Precision Xaxpy<float>::precision_ = Precision::kSingle;
-template <> const Precision Xaxpy<double>::precision_ = Precision::kDouble;
-template <> const Precision Xaxpy<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
+ Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xaxpy.opencl"
@@ -65,7 +56,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// Retrieves the Xaxpy kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Upload the scalar argument as a constant buffer to the device (needed for half-precision)
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 647a681a..d85efca3 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xcopy<half>::precision_ = Precision::kHalf;
-template <> const Precision Xcopy<float>::precision_ = Precision::kSingle;
-template <> const Precision Xcopy<double>::precision_ = Precision::kDouble;
-template <> const Precision Xcopy<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xcopy<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
+ Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xcopy.opencl"
@@ -65,7 +56,7 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
// Retrieves the Xcopy kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index eac64d13..e3a6b1f0 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xdot<half>::precision_ = Precision::kHalf;
-template <> const Precision Xdot<float>::precision_ = Precision::kSingle;
-template <> const Precision Xdot<double>::precision_ = Precision::kDouble;
-template <> const Precision Xdot<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xdot<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xdot"}, precision_) {
+ Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/xdot.opencl"
;
@@ -60,7 +51,7 @@ StatusCode Xdot<T>::DoDot(const size_t n,
// Retrieves the Xdot kernels from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel1 = Kernel(program, "Xdot");
auto kernel2 = Kernel(program, "XdotEpilogue");
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index 23055aac..1730e144 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xnrm2<half>::precision_ = Precision::kHalf;
-template <> const Precision Xnrm2<float>::precision_ = Precision::kSingle;
-template <> const Precision Xnrm2<double>::precision_ = Precision::kDouble;
-template <> const Precision Xnrm2<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xnrm2<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xdot"}, precision_) {
+ Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/xnrm2.opencl"
;
@@ -56,7 +47,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
// Retrieves the Xnrm2 kernels from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel1 = Kernel(program, "Xnrm2");
auto kernel2 = Kernel(program, "Xnrm2Epilogue");
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index 22d2cb5b..4792d40b 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xscal<half>::precision_ = Precision::kHalf;
-template <> const Precision Xscal<float>::precision_ = Precision::kSingle;
-template <> const Precision Xscal<double>::precision_ = Precision::kDouble;
-template <> const Precision Xscal<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xscal<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
+ Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xscal.opencl"
@@ -61,7 +52,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
// Retrieves the Xscal kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index b6996932..897515fb 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xswap<half>::precision_ = Precision::kHalf;
-template <> const Precision Xswap<float>::precision_ = Precision::kSingle;
-template <> const Precision Xswap<double>::precision_ = Precision::kDouble;
-template <> const Precision Xswap<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xswap<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xaxpy"}, precision_) {
+ Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xswap.opencl"
@@ -65,7 +56,7 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
// Retrieves the Xswap kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index b997673b..ea3b245d 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xgemv<half>::precision_ = Precision::kHalf;
-template <> const Precision Xgemv<float>::precision_ = Precision::kSingle;
-template <> const Precision Xgemv<double>::precision_ = Precision::kDouble;
-template <> const Precision Xgemv<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Pad", "Xgemv"}, precision_) {
+ Routine<T>(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/xgemv.opencl"
#include "../../kernels/level2/xgemv_fast.opencl"
@@ -143,7 +134,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Retrieves the Xgemv kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index e2f7397a..e487d41b 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xger<half>::precision_ = Precision::kHalf;
-template <> const Precision Xger<float>::precision_ = Precision::kSingle;
-template <> const Precision Xger<double>::precision_ = Precision::kDouble;
-template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xger<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xger.opencl"
@@ -71,7 +62,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xger");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc
index 3ee3911a..08ff5a2e 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cc
@@ -18,19 +18,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xher<half, half>::precision_ = Precision::kHalf;
-template <> const Precision Xher<float, float>::precision_ = Precision::kSingle;
-template <> const Precision Xher<double, double>::precision_ = Precision::kDouble;
-template <> const Precision Xher<float2, float>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xher<double2, double>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher.opencl"
@@ -85,7 +76,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xher");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc
index 9edc1dd9..d5d4323b 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cc
@@ -18,19 +18,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xher2<half>::precision_ = Precision::kHalf;
-template <> const Precision Xher2<float>::precision_ = Precision::kSingle;
-template <> const Precision Xher2<double>::precision_ = Precision::kDouble;
-template <> const Precision Xher2<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xher2<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Xger"}, precision_) {
+ Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher2.opencl"
@@ -73,7 +64,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// Retrieves the kernel from the compiled binary
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, "Xher2");
// Sets the kernel arguments
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index a602e550..7d06c2a2 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xgemm<half>::precision_ = Precision::kHalf;
-template <> const Precision Xgemm<float>::precision_ = Precision::kSingle;
-template <> const Precision Xgemm<double>::precision_ = Precision::kDouble;
-template <> const Precision Xgemm<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -112,7 +103,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
try {
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == m_ceiled && a_two == k_ceiled && a_ld == m_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc
index 8b2c971d..8120c09c 100644
--- a/src/routines/level3/xhemm.cc
+++ b/src/routines/level3/xhemm.cc
@@ -61,7 +61,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm
// routine afterwards
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the hermitian-to-squared kernel
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index 8fc70abd..0e5178df 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -19,16 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xher2k<float2,float>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xher2k<double2,double>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -94,7 +88,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
try {
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Determines whether or not temporary matrices are needed
auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index af0e32ba..f8ec217a 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -19,16 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xherk<float2,float>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xherk<double2,double>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -91,7 +85,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc
index cbacbb71..c5e56617 100644
--- a/src/routines/level3/xsymm.cc
+++ b/src/routines/level3/xsymm.cc
@@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
// routine afterwards
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the symmetric-to-squared kernel
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 88bb5387..b517520c 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xsyr2k<half>::precision_ = Precision::kHalf;
-template <> const Precision Xsyr2k<float>::precision_ = Precision::kSingle;
-template <> const Precision Xsyr2k<double>::precision_ = Precision::kDouble;
-template <> const Precision Xsyr2k<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -93,7 +84,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Determines whether or not temporary matrices are needed
auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 88623ad4..ccf8710c 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xsyrk<half>::precision_ = Precision::kHalf;
-template <> const Precision Xsyrk<float>::precision_ = Precision::kSingle;
-template <> const Precision Xsyrk<double>::precision_ = Precision::kDouble;
-template <> const Precision Xsyrk<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -89,7 +80,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
try {
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc
index b756d187..92dda9fb 100644
--- a/src/routines/level3/xtrmm.cc
+++ b/src/routines/level3/xtrmm.cc
@@ -63,7 +63,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the triangular matrix to be able to run the regular Xgemm
// routine afterwards
try {
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the triangular-to-squared kernel
diff --git a/src/routines/levelx/xomatcopy.cc b/src/routines/levelx/xomatcopy.cc
index 80683b7a..c724b56b 100644
--- a/src/routines/levelx/xomatcopy.cc
+++ b/src/routines/levelx/xomatcopy.cc
@@ -19,19 +19,10 @@
namespace clblast {
// =================================================================================================
-// Specific implementations to get the memory-type based on a template argument
-template <> const Precision Xomatcopy<half>::precision_ = Precision::kHalf;
-template <> const Precision Xomatcopy<float>::precision_ = Precision::kSingle;
-template <> const Precision Xomatcopy<double>::precision_ = Precision::kDouble;
-template <> const Precision Xomatcopy<float2>::precision_ = Precision::kComplexSingle;
-template <> const Precision Xomatcopy<double2>::precision_ = Precision::kComplexDouble;
-
-// =================================================================================================
-
// Constructor: forwards to base class constructor
template <typename T>
Xomatcopy<T>::Xomatcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, precision_) {
+ Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -78,7 +69,7 @@ StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_trans
if (ErrorIn(status)) { return status; }
// Loads the program from the database
- const auto program = GetProgramFromCache(context_, precision_, routine_name_);
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto emptyEventList = std::vector<Event>();
status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, emptyEventList,
diff --git a/src/utilities.cc b/src/utilities.cc
index 851e6d9f..30b09a5f 100644
--- a/src/utilities.cc
+++ b/src/utilities.cc
@@ -360,6 +360,13 @@ size_t GetBytes(const Precision precision) {
}
}
+// Convert the template argument into a precision value
+template <> Precision PrecisionValue<half>() { return Precision::kHalf; }
+template <> Precision PrecisionValue<float>() { return Precision::kSingle; }
+template <> Precision PrecisionValue<double>() { return Precision::kDouble; }
+template <> Precision PrecisionValue<float2>() { return Precision::kComplexSingle; }
+template <> Precision PrecisionValue<double2>() { return Precision::kComplexDouble; }
+
// =================================================================================================
// Returns false is this precision is not supported by the device