diff options
-rw-r--r-- | include/internal/routine.h | 19 | ||||
-rw-r--r-- | src/routine.cc | 12 | ||||
-rw-r--r-- | src/routines/level1/xaxpy.cc | 2 | ||||
-rw-r--r-- | src/routines/level2/xgemv.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xgemm.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xherk.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cc | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cc | 2 |
9 files changed, 20 insertions, 25 deletions
diff --git a/include/internal/routine.h b/include/internal/routine.h index 7b605d48..911bda49 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -34,20 +34,14 @@ class Routine { Program program; std::string device_name; Precision precision; - std::vector<std::string> routines; + std::string routine_name_; // Finds out whether the properties match - bool MatchInCache(const std::string &ref_name, const Precision &ref_precision, - const std::vector<std::string> &ref_routines) { - auto ref_size = ref_routines.size(); - if (device_name == ref_name && precision == ref_precision && routines.size() == ref_size) { - auto found_match = true; - for (auto i=size_t{0}; i<ref_size; ++i) { - if (routines[i] != ref_routines[i]) { found_match = false; } - } - return found_match; - } - return false; + bool MatchInCache(const std::string &ref_device, const Precision &ref_precision, + const std::string &ref_routine) { + return (device_name == ref_device && + precision == ref_precision && + routine_name_ == ref_routine); } }; @@ -125,7 +119,6 @@ class Routine { // Connection to the database for all the device-specific parameters const Database db_; - const std::vector<std::string> routines_; }; // ================================================================================================= diff --git a/src/routine.cc b/src/routine.cc index eb5c5034..aded1a31 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -34,8 +34,7 @@ Routine::Routine(CommandQueue &queue, Event &event, const std::string &name, max_work_item_dimensions_(device_.MaxWorkItemDimensions()), max_work_item_sizes_(device_.MaxWorkItemSizes()), max_work_group_size_(device_.MaxWorkGroupSize()), - db_(queue_, routines, precision_), - routines_(routines) { + db_(queue_, routines, precision_) { } // ================================================================================================= @@ -71,6 +70,9 @@ StatusCode Routine::SetUp() { auto defines = db_.GetDefines(); defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; + // Adds the name of the routine as a define + defines += "#define ROUTINE_"+routine_name_+"\n"; + // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. if (device_.Vendor() == "AMD") { @@ -95,7 +97,7 @@ StatusCode Routine::SetUp() { if (status == CL_INVALID_BINARY) { return StatusCode::kInvalidBinary; } // Store the compiled program in the cache - program_cache_.push_back({program, device_name_, precision_, routines_}); + program_cache_.push_back({program, device_name_, precision_, routine_name_}); } catch (...) { return StatusCode::kBuildProgramFailure; } } @@ -328,7 +330,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr // otherwise. const Program& Routine::GetProgramFromCache() const { for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routines_)) { + if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return cached_program.program; } } @@ -338,7 +340,7 @@ const Program& Routine::GetProgramFromCache() const { // Queries the cache to see whether or not the compiled kernel is already there bool Routine::ProgramIsInCache() const { for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routines_)) { return true; } + if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return true; } } return false; } diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index ed680856..e6b320d9 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -30,7 +30,7 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event): - Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) { + Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/xaxpy.opencl" ; diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 22bbb7ea..a7052af8 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(CommandQueue &queue, Event &event): - Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) { + Routine(queue, event, "GEMV", {"Xgemv"}, precision_) { source_string_ = #include "../../kernels/xgemv.opencl" ; diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 13ffafbb..85524891 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(CommandQueue &queue, Event &event): - Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index b4291c1e..fa42733f 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -28,7 +28,7 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom // Constructor: forwards to base class constructor template <typename T, typename U> Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event): - Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 4b16d8f7..ae350050 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -28,7 +28,7 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp // Constructor: forwards to base class constructor template <typename T, typename U> Xherk<T,U>::Xherk(CommandQueue &queue, Event &event): - Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 6483629c..7ab3430a 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template <typename T> Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event): - Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 5cc1cbec..c6feb5e6 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template <typename T> Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event): - Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" |