summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/internal/routine.h19
-rw-r--r--src/routine.cc12
-rw-r--r--src/routines/level1/xaxpy.cc2
-rw-r--r--src/routines/level2/xgemv.cc2
-rw-r--r--src/routines/level3/xgemm.cc2
-rw-r--r--src/routines/level3/xher2k.cc2
-rw-r--r--src/routines/level3/xherk.cc2
-rw-r--r--src/routines/level3/xsyr2k.cc2
-rw-r--r--src/routines/level3/xsyrk.cc2
9 files changed, 20 insertions, 25 deletions
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 7b605d48..911bda49 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -34,20 +34,14 @@ class Routine {
Program program;
std::string device_name;
Precision precision;
- std::vector<std::string> routines;
+ std::string routine_name_;
// Finds out whether the properties match
- bool MatchInCache(const std::string &ref_name, const Precision &ref_precision,
- const std::vector<std::string> &ref_routines) {
- auto ref_size = ref_routines.size();
- if (device_name == ref_name && precision == ref_precision && routines.size() == ref_size) {
- auto found_match = true;
- for (auto i=size_t{0}; i<ref_size; ++i) {
- if (routines[i] != ref_routines[i]) { found_match = false; }
- }
- return found_match;
- }
- return false;
+ bool MatchInCache(const std::string &ref_device, const Precision &ref_precision,
+ const std::string &ref_routine) {
+ return (device_name == ref_device &&
+ precision == ref_precision &&
+ routine_name_ == ref_routine);
}
};
@@ -125,7 +119,6 @@ class Routine {
// Connection to the database for all the device-specific parameters
const Database db_;
- const std::vector<std::string> routines_;
};
// =================================================================================================
diff --git a/src/routine.cc b/src/routine.cc
index eb5c5034..aded1a31 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -34,8 +34,7 @@ Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
max_work_item_dimensions_(device_.MaxWorkItemDimensions()),
max_work_item_sizes_(device_.MaxWorkItemSizes()),
max_work_group_size_(device_.MaxWorkGroupSize()),
- db_(queue_, routines, precision_),
- routines_(routines) {
+ db_(queue_, routines, precision_) {
}
// =================================================================================================
@@ -71,6 +70,9 @@ StatusCode Routine::SetUp() {
auto defines = db_.GetDefines();
defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ // Adds the name of the routine as a define
+ defines += "#define ROUTINE_"+routine_name_+"\n";
+
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.Vendor() == "AMD") {
@@ -95,7 +97,7 @@ StatusCode Routine::SetUp() {
if (status == CL_INVALID_BINARY) { return StatusCode::kInvalidBinary; }
// Store the compiled program in the cache
- program_cache_.push_back({program, device_name_, precision_, routines_});
+ program_cache_.push_back({program, device_name_, precision_, routine_name_});
} catch (...) { return StatusCode::kBuildProgramFailure; }
}
@@ -328,7 +330,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr
// otherwise.
const Program& Routine::GetProgramFromCache() const {
for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routines_)) {
+ if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) {
return cached_program.program;
}
}
@@ -338,7 +340,7 @@ const Program& Routine::GetProgramFromCache() const {
// Queries the cache to see whether or not the compiled kernel is already there
bool Routine::ProgramIsInCache() const {
for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routines_)) { return true; }
+ if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return true; }
}
return false;
}
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index ed680856..e6b320d9 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -30,7 +30,7 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
+ Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/xaxpy.opencl"
;
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index 22bbb7ea..a7052af8 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -30,7 +30,7 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
+ Routine(queue, event, "GEMV", {"Xgemv"}, precision_) {
source_string_ =
#include "../../kernels/xgemv.opencl"
;
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 13ffafbb..85524891 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -30,7 +30,7 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index b4291c1e..fa42733f 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -28,7 +28,7 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index 4b16d8f7..ae350050 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -28,7 +28,7 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 6483629c..7ab3430a 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -30,7 +30,7 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 5cc1cbec..c6feb5e6 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -30,7 +30,7 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"