diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/routines/level1/xamax.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xasum.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xaxpy.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xcopy.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xdot.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xnrm2.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xscal.cc | 13 | ||||
-rw-r--r-- | src/routines/level1/xswap.cc | 13 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cc | 13 | ||||
-rw-r--r-- | src/routines/level2/xger.cc | 13 | ||||
-rw-r--r-- | src/routines/level2/xher.cc | 13 | ||||
-rw-r--r-- | src/routines/level2/xher2.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xhemm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xsymm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xtrmm.cc | 2 | ||||
-rw-r--r-- | src/routines/levelx/xomatcopy.cc | 13 | ||||
-rw-r--r-- | src/utilities.cc | 7 |
22 files changed, 46 insertions, 195 deletions
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc index 9e203d03..1a3441ef 100644 --- a/src/routines/level1/xamax.cc +++ b/src/routines/level1/xamax.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xamax<half>::precision_ = Precision::kHalf; -template <> const Precision Xamax<float>::precision_ = Precision::kSingle; -template <> const Precision Xamax<double>::precision_ = Precision::kDouble; -template <> const Precision Xamax<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xamax<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xdot"}, precision_) { + Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/xamax.opencl" ; @@ -56,7 +47,7 @@ StatusCode Xamax<T>::DoAmax(const size_t n, // Retrieves the Xamax kernels from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel1 = Kernel(program, "Xamax"); auto kernel2 = Kernel(program, "XamaxEpilogue"); diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc index f4d898be..85c6e1ed 100644 --- a/src/routines/level1/xasum.cc +++ b/src/routines/level1/xasum.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xasum<half>::precision_ = Precision::kHalf; -template <> const Precision Xasum<float>::precision_ = Precision::kSingle; -template <> const Precision Xasum<double>::precision_ = Precision::kDouble; -template <> const Precision Xasum<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xasum<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xasum<T>::Xasum(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xdot"}, precision_) { + Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/xasum.opencl" ; @@ -56,7 +47,7 @@ StatusCode Xasum<T>::DoAsum(const size_t n, // Retrieves the Xasum kernels from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel1 = Kernel(program, "Xasum"); auto kernel2 = Kernel(program, "XasumEpilogue"); diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index 221e1195..39121067 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xaxpy<half>::precision_ = Precision::kHalf; -template <> const Precision Xaxpy<float>::precision_ = Precision::kSingle; -template <> const Precision Xaxpy<double>::precision_ = Precision::kDouble; -template <> const Precision Xaxpy<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { + Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xaxpy.opencl" @@ -65,7 +56,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, // Retrieves the Xaxpy kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Upload the scalar argument as a constant buffer to the device (needed for half-precision) diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc index 647a681a..d85efca3 100644 --- a/src/routines/level1/xcopy.cc +++ b/src/routines/level1/xcopy.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xcopy<half>::precision_ = Precision::kHalf; -template <> const Precision Xcopy<float>::precision_ = Precision::kSingle; -template <> const Precision Xcopy<double>::precision_ = Precision::kDouble; -template <> const Precision Xcopy<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xcopy<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { + Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xcopy.opencl" @@ -65,7 +56,7 @@ StatusCode Xcopy<T>::DoCopy(const size_t n, // Retrieves the Xcopy kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc index eac64d13..e3a6b1f0 100644 --- a/src/routines/level1/xdot.cc +++ b/src/routines/level1/xdot.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xdot<half>::precision_ = Precision::kHalf; -template <> const Precision Xdot<float>::precision_ = Precision::kSingle; -template <> const Precision Xdot<double>::precision_ = Precision::kDouble; -template <> const Precision Xdot<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xdot<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xdot"}, precision_) { + Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/xdot.opencl" ; @@ -60,7 +51,7 @@ StatusCode Xdot<T>::DoDot(const size_t n, // Retrieves the Xdot kernels from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel1 = Kernel(program, "Xdot"); auto kernel2 = Kernel(program, "XdotEpilogue"); diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc index 23055aac..1730e144 100644 --- a/src/routines/level1/xnrm2.cc +++ b/src/routines/level1/xnrm2.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xnrm2<half>::precision_ = Precision::kHalf; -template <> const Precision Xnrm2<float>::precision_ = Precision::kSingle; -template <> const Precision Xnrm2<double>::precision_ = Precision::kDouble; -template <> const Precision Xnrm2<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xnrm2<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xdot"}, precision_) { + Routine<T>(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/xnrm2.opencl" ; @@ -56,7 +47,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n, // Retrieves the Xnrm2 kernels from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel1 = Kernel(program, "Xnrm2"); auto kernel2 = Kernel(program, "Xnrm2Epilogue"); diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc index 22d2cb5b..4792d40b 100644 --- a/src/routines/level1/xscal.cc +++ b/src/routines/level1/xscal.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xscal<half>::precision_ = Precision::kHalf; -template <> const Precision Xscal<float>::precision_ = Precision::kSingle; -template <> const Precision Xscal<double>::precision_ = Precision::kDouble; -template <> const Precision Xscal<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xscal<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { + Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xscal.opencl" @@ -61,7 +52,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha, // Retrieves the Xscal kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc index b6996932..897515fb 100644 --- a/src/routines/level1/xswap.cc +++ b/src/routines/level1/xswap.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xswap<half>::precision_ = Precision::kHalf; -template <> const Precision Xswap<float>::precision_ = Precision::kSingle; -template <> const Precision Xswap<double>::precision_ = Precision::kDouble; -template <> const Precision Xswap<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xswap<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xaxpy"}, precision_) { + Routine<T>(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level1/level1.opencl" #include "../../kernels/level1/xswap.opencl" @@ -65,7 +56,7 @@ StatusCode Xswap<T>::DoSwap(const size_t n, // Retrieves the Xswap kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index b997673b..ea3b245d 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xgemv<half>::precision_ = Precision::kHalf; -template <> const Precision Xgemv<float>::precision_ = Precision::kSingle; -template <> const Precision Xgemv<double>::precision_ = Precision::kDouble; -template <> const Precision Xgemv<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Pad", "Xgemv"}, precision_) { + Routine<T>(queue, event, name, {"Pad", "Xgemv"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" @@ -143,7 +134,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // Retrieves the Xgemv kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index e2f7397a..e487d41b 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xger<half>::precision_ = Precision::kHalf; -template <> const Precision Xger<float>::precision_ = Precision::kSingle; -template <> const Precision Xger<double>::precision_ = Precision::kDouble; -template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xger<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xger.opencl" @@ -71,7 +62,7 @@ StatusCode Xger<T>::DoGer(const Layout layout, // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xger"); // Sets the kernel arguments diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 3ee3911a..08ff5a2e 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -18,19 +18,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher<half, half>::precision_ = Precision::kHalf; -template <> const Precision Xher<float, float>::precision_ = Precision::kSingle; -template <> const Precision Xher<double, double>::precision_ = Precision::kDouble; -template <> const Precision Xher<float2, float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher<double2, double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher.opencl" @@ -85,7 +76,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xher"); // Sets the kernel arguments diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index 9edc1dd9..d5d4323b 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -18,19 +18,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher2<half>::precision_ = Precision::kHalf; -template <> const Precision Xher2<float>::precision_ = Precision::kSingle; -template <> const Precision Xher2<double>::precision_ = Precision::kDouble; -template <> const Precision Xher2<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher2<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Xger"}, precision_) { + Routine<T>(queue, event, name, {"Xger"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level2/level2.opencl" #include "../../kernels/level2/xher2.opencl" @@ -73,7 +64,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, // Retrieves the kernel from the compiled binary try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, "Xher2"); // Sets the kernel arguments diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index a602e550..7d06c2a2 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xgemm<half>::precision_ = Precision::kHalf; -template <> const Precision Xgemm<float>::precision_ = Precision::kSingle; -template <> const Precision Xgemm<double>::precision_ = Precision::kDouble; -template <> const Precision Xgemm<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -112,7 +103,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == m_ceiled && a_two == k_ceiled && a_ld == m_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index 8b2c971d..8120c09c 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -61,7 +61,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the hermitian-to-squared kernel diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 8fc70abd..0e5178df 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -19,16 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher2k<float2,float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher2k<double2,double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -94,7 +88,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index af0e32ba..f8ec217a 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -19,16 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xherk<float2,float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xherk<double2,double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -91,7 +85,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index cbacbb71..c5e56617 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the symmetric-to-squared kernel diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 88bb5387..b517520c 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xsyr2k<half>::precision_ = Precision::kHalf; -template <> const Precision Xsyr2k<float>::precision_ = Precision::kSingle; -template <> const Precision Xsyr2k<double>::precision_ = Precision::kDouble; -template <> const Precision Xsyr2k<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -93,7 +84,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 88623ad4..ccf8710c 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xsyrk<half>::precision_ = Precision::kHalf; -template <> const Precision Xsyrk<float>::precision_ = Precision::kSingle; -template <> const Precision Xsyrk<double>::precision_ = Precision::kDouble; -template <> const Precision Xsyrk<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -89,7 +80,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index b756d187..92dda9fb 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -63,7 +63,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the triangular-to-squared kernel diff --git a/src/routines/levelx/xomatcopy.cc b/src/routines/levelx/xomatcopy.cc index 80683b7a..c724b56b 100644 --- a/src/routines/levelx/xomatcopy.cc +++ b/src/routines/levelx/xomatcopy.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xomatcopy<half>::precision_ = Precision::kHalf; -template <> const Precision Xomatcopy<float>::precision_ = Precision::kSingle; -template <> const Precision Xomatcopy<double>::precision_ = Precision::kDouble; -template <> const Precision Xomatcopy<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xomatcopy<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xomatcopy<T>::Xomatcopy(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -78,7 +69,7 @@ StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_trans if (ErrorIn(status)) { return status; } // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto emptyEventList = std::vector<Event>(); status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, emptyEventList, diff --git a/src/utilities.cc b/src/utilities.cc index 851e6d9f..30b09a5f 100644 --- a/src/utilities.cc +++ b/src/utilities.cc @@ -360,6 +360,13 @@ size_t GetBytes(const Precision precision) { } } +// Convert the template argument into a precision value +template <> Precision PrecisionValue<half>() { return Precision::kHalf; } +template <> Precision PrecisionValue<float>() { return Precision::kSingle; } +template <> Precision PrecisionValue<double>() { return Precision::kDouble; } +template <> Precision PrecisionValue<float2>() { return Precision::kComplexSingle; } +template <> Precision PrecisionValue<double2>() { return Precision::kComplexDouble; } + // ================================================================================================= // Returns false is this precision is not supported by the device |