summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-19 16:24:14 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-19 16:24:14 +0200
commit48e2e96f1ba44e9b12a2449390bbbbd5e02777a3 (patch)
tree04d33841f31f6ef8bbf38c9cce15e9bbdbde01cc /src
parent4e499a67c1eb9f7cc4a79e75046aa0315b5df87d (diff)
Kernel caching is now based on a routine's name
Diffstat (limited to 'src')
-rw-r--r--src/routine.cc12
-rw-r--r--src/routines/level1/xaxpy.cc2
-rw-r--r--src/routines/level2/xgemv.cc2
-rw-r--r--src/routines/level3/xgemm.cc2
-rw-r--r--src/routines/level3/xher2k.cc2
-rw-r--r--src/routines/level3/xherk.cc2
-rw-r--r--src/routines/level3/xsyr2k.cc2
-rw-r--r--src/routines/level3/xsyrk.cc2
8 files changed, 14 insertions, 12 deletions
diff --git a/src/routine.cc b/src/routine.cc
index eb5c5034..aded1a31 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -34,8 +34,7 @@ Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
max_work_item_dimensions_(device_.MaxWorkItemDimensions()),
max_work_item_sizes_(device_.MaxWorkItemSizes()),
max_work_group_size_(device_.MaxWorkGroupSize()),
- db_(queue_, routines, precision_),
- routines_(routines) {
+ db_(queue_, routines, precision_) {
}
// =================================================================================================
@@ -71,6 +70,9 @@ StatusCode Routine::SetUp() {
auto defines = db_.GetDefines();
defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ // Adds the name of the routine as a define
+ defines += "#define ROUTINE_"+routine_name_+"\n";
+
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.Vendor() == "AMD") {
@@ -95,7 +97,7 @@ StatusCode Routine::SetUp() {
if (status == CL_INVALID_BINARY) { return StatusCode::kInvalidBinary; }
// Store the compiled program in the cache
- program_cache_.push_back({program, device_name_, precision_, routines_});
+ program_cache_.push_back({program, device_name_, precision_, routine_name_});
} catch (...) { return StatusCode::kBuildProgramFailure; }
}
@@ -328,7 +330,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr
// otherwise.
const Program& Routine::GetProgramFromCache() const {
for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routines_)) {
+ if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) {
return cached_program.program;
}
}
@@ -338,7 +340,7 @@ const Program& Routine::GetProgramFromCache() const {
// Queries the cache to see whether or not the compiled kernel is already there
bool Routine::ProgramIsInCache() const {
for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routines_)) { return true; }
+ if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { return true; }
}
return false;
}
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index ed680856..e6b320d9 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -30,7 +30,7 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
+ Routine(queue, event, "AXPY", {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/xaxpy.opencl"
;
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index 22bbb7ea..a7052af8 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -30,7 +30,7 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
+ Routine(queue, event, "GEMV", {"Xgemv"}, precision_) {
source_string_ =
#include "../../kernels/xgemv.opencl"
;
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 13ffafbb..85524891 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -30,7 +30,7 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "GEMM", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index b4291c1e..fa42733f 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -28,7 +28,7 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "HER2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index 4b16d8f7..ae350050 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -28,7 +28,7 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "HERK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 6483629c..7ab3430a 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -30,7 +30,7 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "SYR2K", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 5cc1cbec..c6feb5e6 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -30,7 +30,7 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
- Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
+ Routine(queue, event, "SYRK", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"