From 48e2e96f1ba44e9b12a2449390bbbbd5e02777a3 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sun, 19 Jul 2015 16:24:14 +0200 Subject: Kernel caching is now based on a routine's name --- src/routine.cc | 12 +++++++----- src/routines/level1/xaxpy.cc | 2 +- src/routines/level2/xgemv.cc | 2 +- src/routines/level3/xgemm.cc | 2 +- src/routines/level3/xher2k.cc | 2 +- src/routines/level3/xherk.cc | 2 +- src/routines/level3/xsyr2k.cc | 2 +- src/routines/level3/xsyrk.cc | 2 +- 8 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/routine.cc b/src/routine.cc index eb5c5034..aded1a31 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -34,8 +34,7 @@ Routine::Routine(CommandQueue &queue, Event &event, const std::string &name, max_work_item_dimensions_(device_.MaxWorkItemDimensions()), max_work_item_sizes_(device_.MaxWorkItemSizes()), max_work_group_size_(device_.MaxWorkGroupSize()), - db_(queue_, routines, precision_), - routines_(routines) { + db_(queue_, routines, precision_) { } // ================================================================================================= @@ -71,6 +70,9 @@ StatusCode Routine::SetUp() { auto defines = db_.GetDefines(); defines += "#define PRECISION "+ToString(static_cast(precision_))+"\n"; + // Adds the name of the routine as a define + defines += "#define ROUTINE_"+routine_name_+"\n"; + // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. if (device_.Vendor() == "AMD") { @@ -95,7 +97,7 @@ StatusCode Routine::SetUp() { if (status == CL_INVALID_BINARY) { return StatusCode::kInvalidBinary; } // Store the compiled program in the cache - program_cache_.push_back({program, device_name_, precision_, routines_}); + program_cache_.push_back({program, device_name_, precision_, routine_name_}); } catch (...) { return StatusCode::kBuildProgramFailure; } } @@ -328,7 +330,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr // otherwise. const Program& Routine::GetProgramFromCache() const { for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routines_)) { + if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return cached_program.program; } } @@ -338,7 +340,7 @@ const Program& Routine::GetProgramFromCache() const { // Queries the cache to see whether or not the compiled kernel is already there bool Routine::ProgramIsInCache() const { for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routines_)) { return true; } + if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return true; } } return false; } diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index ed680856..e6b320d9 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -30,7 +30,7 @@ template <> const Precision Xaxpy::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xaxpy::Xaxpy(CommandQueue &queue, Event &event): - Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) { + Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) { source_string_ = #include "../../kernels/xaxpy.opencl" ; diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 22bbb7ea..a7052af8 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemv::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xgemv::Xgemv(CommandQueue &queue, Event &event): - Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) { + Routine(queue, event, "GEMV", {"Xgemv"}, precision_) { source_string_ = #include "../../kernels/xgemv.opencl" ; diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 13ffafbb..85524891 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -30,7 +30,7 @@ template <> const Precision Xgemm::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xgemm::Xgemm(CommandQueue &queue, Event &event): - Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index b4291c1e..fa42733f 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -28,7 +28,7 @@ template <> const Precision Xher2k::precision_ = Precision::kCom // Constructor: forwards to base class constructor template Xher2k::Xher2k(CommandQueue &queue, Event &event): - Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 4b16d8f7..ae350050 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -28,7 +28,7 @@ template <> const Precision Xherk::precision_ = Precision::kComp // Constructor: forwards to base class constructor template Xherk::Xherk(CommandQueue &queue, Event &event): - Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index 6483629c..7ab3430a 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyr2k::precision_ = Precision::kComplexDou // Constructor: forwards to base class constructor template Xsyr2k::Xsyr2k(CommandQueue &queue, Event &event): - Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index 5cc1cbec..c6feb5e6 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -30,7 +30,7 @@ template <> const Precision Xsyrk::precision_ = Precision::kComplexDoub // Constructor: forwards to base class constructor template Xsyrk::Xsyrk(CommandQueue &queue, Event &event): - Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { + Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) { source_string_ = #include "../../kernels/copy.opencl" #include "../../kernels/pad.opencl" -- cgit v1.2.3