diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-06-17 14:30:37 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-06-17 14:30:37 +0200 |
commit | f9947b4d7ffedcf98cdb128de835422f647e7f15 (patch) | |
tree | c42d427a23de5d14b3407943d529d18cd9e516f0 /src/routines/level3 | |
parent | 536b7fe4bce4b183cb060a1b9045752ae39d842f (diff) |
Removed the precision argument from the routines in favor of a single templated function
Diffstat (limited to 'src/routines/level3')
-rw-r--r-- | src/routines/level3/xgemm.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xhemm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 10 | ||||
-rw-r--r-- | src/routines/level3/xsymm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 13 | ||||
-rw-r--r-- | src/routines/level3/xtrmm.cc | 2 |
8 files changed, 13 insertions, 52 deletions
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index a602e550..7d06c2a2 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xgemm<half>::precision_ = Precision::kHalf; -template <> const Precision Xgemm<float>::precision_ = Precision::kSingle; -template <> const Precision Xgemm<double>::precision_ = Precision::kDouble; -template <> const Precision Xgemm<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -112,7 +103,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == m_ceiled && a_two == k_ceiled && a_ld == m_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index 8b2c971d..8120c09c 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -61,7 +61,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the hermitian-to-squared kernel diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 8fc70abd..0e5178df 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -19,16 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xher2k<float2,float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xher2k<double2,double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -94,7 +88,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index af0e32ba..f8ec217a 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -19,16 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xherk<float2,float>::precision_ = Precision::kComplexSingle; -template <> const Precision Xherk<double2,double>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -91,7 +85,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index cbacbb71..c5e56617 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the symmetric-to-squared kernel diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 88bb5387..b517520c 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xsyr2k<half>::precision_ = Precision::kHalf; -template <> const Precision Xsyr2k<float>::precision_ = Precision::kSingle; -template <> const Precision Xsyr2k<double>::precision_ = Precision::kDouble; -template <> const Precision Xsyr2k<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -93,7 +84,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 88623ad4..ccf8710c 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -19,19 +19,10 @@ namespace clblast { // ================================================================================================= -// Specific implementations to get the memory-type based on a template argument -template <> const Precision Xsyrk<half>::precision_ = Precision::kHalf; -template <> const Precision Xsyrk<float>::precision_ = Precision::kSingle; -template <> const Precision Xsyrk<double>::precision_ = Precision::kDouble; -template <> const Precision Xsyrk<float2>::precision_ = Precision::kComplexSingle; -template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDouble; - -// ================================================================================================= - // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name): - Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, precision_) { + Routine<T>(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -89,7 +80,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const try { // Loads the program from the database - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); // Determines whether or not temporary matrices are needed auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 && diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index b756d187..92dda9fb 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -63,7 +63,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm // routine afterwards try { - const auto program = GetProgramFromCache(context_, precision_, routine_name_); + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the triangular-to-squared kernel |