summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-04-28 21:14:17 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-04-28 21:14:17 +0200
commitd9b21d7f4920b115d3fe7f2e3cce1f89eb762c10 (patch)
treec9a44f189fce5f6fc2456604dfe4c9d3e951e4e0 /src
parentd7ddbdeb1f416f56bc469d16c051551207274703 (diff)
Fixed the cache to store binaries instead of OpenCL programs
Diffstat (limited to 'src')
-rw-r--r--src/cache.cc56
-rw-r--r--src/clblast.cc4
-rw-r--r--src/clblast_c.cc6
-rw-r--r--src/routine.cc5
-rw-r--r--src/routines/level1/xamax.cc2
-rw-r--r--src/routines/level1/xasum.cc2
-rw-r--r--src/routines/level1/xaxpy.cc2
-rw-r--r--src/routines/level1/xcopy.cc2
-rw-r--r--src/routines/level1/xdot.cc2
-rw-r--r--src/routines/level1/xnrm2.cc2
-rw-r--r--src/routines/level1/xscal.cc2
-rw-r--r--src/routines/level1/xswap.cc2
-rw-r--r--src/routines/level2/xgemv.cc2
-rw-r--r--src/routines/level2/xger.cc2
-rw-r--r--src/routines/level2/xher.cc2
-rw-r--r--src/routines/level2/xher2.cc2
-rw-r--r--src/routines/level3/xgemm.cc2
-rw-r--r--src/routines/level3/xhemm.cc2
-rw-r--r--src/routines/level3/xher2k.cc2
-rw-r--r--src/routines/level3/xherk.cc2
-rw-r--r--src/routines/level3/xsymm.cc2
-rw-r--r--src/routines/level3/xsyr2k.cc2
-rw-r--r--src/routines/level3/xsyrk.cc2
-rw-r--r--src/routines/level3/xtrmm.cc2
24 files changed, 56 insertions, 55 deletions
diff --git a/src/cache.cc b/src/cache.cc
index beeb1b35..18731a51 100644
--- a/src/cache.cc
+++ b/src/cache.cc
@@ -21,50 +21,50 @@ namespace clblast {
namespace cache {
// =================================================================================================
-// Stores the compiled program in the cache
-void StoreProgramToCache(const Program& program, const std::string &device_name,
- const Precision &precision, const std::string &routine_name) {
- program_cache_mutex_.lock();
- program_cache_.push_back({program, device_name, precision, routine_name});
- program_cache_mutex_.unlock();
+// Stores the compiled binary or IR in the cache
+void StoreBinaryToCache(const std::string& binary, const std::string &device_name,
+ const Precision &precision, const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ binary_cache_.push_back({binary, device_name, precision, routine_name});
+ binary_cache_mutex_.unlock();
}
-// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
+// Queries the cache and retrieves a matching binary. Assumes that the match is available, throws
// otherwise.
-const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name) {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name, precision, routine_name)) {
- program_cache_mutex_.unlock();
- return cached_program.program;
+const std::string& GetBinaryFromCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ for (auto &cached_binary: binary_cache_) {
+ if (cached_binary.MatchInCache(device_name, precision, routine_name)) {
+ binary_cache_mutex_.unlock();
+ return cached_binary.binary;
}
}
- program_cache_mutex_.unlock();
- throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none.");
+ binary_cache_mutex_.unlock();
+ throw std::runtime_error("Internal CLBlast error: Expected binary in cache, but found none.");
}
// Queries the cache to see whether or not the compiled kernel is already there
-bool ProgramIsInCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name) {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name, precision, routine_name)) {
- program_cache_mutex_.unlock();
+bool BinaryIsInCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ for (auto &cached_binary: binary_cache_) {
+ if (cached_binary.MatchInCache(device_name, precision, routine_name)) {
+ binary_cache_mutex_.unlock();
return true;
}
}
- program_cache_mutex_.unlock();
+ binary_cache_mutex_.unlock();
return false;
}
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode ClearCompiledProgramCache() {
- program_cache_mutex_.lock();
- program_cache_.clear();
- program_cache_mutex_.unlock();
+// Clears the cache of stored binaries
+StatusCode ClearCache() {
+ binary_cache_mutex_.lock();
+ binary_cache_.clear();
+ binary_cache_mutex_.unlock();
return StatusCode::kSuccess;
}
diff --git a/src/clblast.cc b/src/clblast.cc
index fac5a539..fe79d7c1 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -1854,8 +1854,8 @@ template StatusCode PUBLIC_API Trsm<double2>(const Layout, const Side, const Tri
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode ClearCompiledProgramCache() { return cache::ClearCompiledProgramCache(); }
+// Clears the cache of stored binaries
+StatusCode ClearCache() { return cache::ClearCache(); }
// =================================================================================================
} // namespace clblast
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 72d93c4b..172bce64 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -2343,9 +2343,9 @@ StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle tri
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode CLBlastClearCompiledProgramCache() {
- return static_cast<StatusCode>(clblast::ClearCompiledProgramCache());
+// Clears the cache of stored binaries
+StatusCode CLBlastClearCache() {
+ return static_cast<StatusCode>(clblast::ClearCache());
}
// =================================================================================================
diff --git a/src/routine.cc b/src/routine.cc
index e0a75e41..cd4d82fb 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -96,8 +96,9 @@ StatusCode Routine<T>::SetUp() {
}
if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
- // Store the compiled program in the cache (atomic for thread-safety)
- StoreProgramToCache(program);
+ // Store the compiled kernel in the cache
+ auto binary = program.GetIR();
+ StoreBinaryToCache(binary);
} catch (...) { return StatusCode::kBuildProgramFailure; }
}
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc
index ffdfa496..33bd72a6 100644
--- a/src/routines/level1/xamax.cc
+++ b/src/routines/level1/xamax.cc
@@ -55,7 +55,7 @@ StatusCode Xamax<T>::DoAmax(const size_t n,
// Retrieves the Xamax kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xamax");
auto kernel2 = Kernel(program, "XamaxEpilogue");
diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc
index 5799e25a..ea33d7e1 100644
--- a/src/routines/level1/xasum.cc
+++ b/src/routines/level1/xasum.cc
@@ -55,7 +55,7 @@ StatusCode Xasum<T>::DoAsum(const size_t n,
// Retrieves the Xasum kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xasum");
auto kernel2 = Kernel(program, "XasumEpilogue");
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index 37d23543..96809a57 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -64,7 +64,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// Retrieves the Xaxpy kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 04508383..d34482ce 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -64,7 +64,7 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
// Retrieves the Xcopy kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index 4813a004..b2513485 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -59,7 +59,7 @@ StatusCode Xdot<T>::DoDot(const size_t n,
// Retrieves the Xdot kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xdot");
auto kernel2 = Kernel(program, "XdotEpilogue");
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index ceabe586..86166a0c 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -55,7 +55,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
// Retrieves the Xnrm2 kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xnrm2");
auto kernel2 = Kernel(program, "Xnrm2Epilogue");
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index e83e73fd..b92e2cdf 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -60,7 +60,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
// Retrieves the Xscal kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index bc425f40..bfc4a739 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -64,7 +64,7 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
// Retrieves the Xswap kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index 24e87db0..f8985038 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -136,7 +136,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index dda78232..686c7e60 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -66,7 +66,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xger");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc
index aba665b0..a7116213 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cc
@@ -79,7 +79,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc
index bcd6488f..3fd1a961 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cc
@@ -68,7 +68,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher2");
// Sets the kernel arguments
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 7557dcc3..aa081e81 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -107,7 +107,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == m_ceiled && a_two == k_ceiled && a_ld == m_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc
index c0a4306a..d2fbf36e 100644
--- a/src/routines/level3/xhemm.cc
+++ b/src/routines/level3/xhemm.cc
@@ -61,7 +61,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the hermitian-to-squared kernel
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index 4d5a4d35..2c2c815d 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -93,7 +93,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index 574debe4..414c4760 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -90,7 +90,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc
index 914a326a..a39026f1 100644
--- a/src/routines/level3/xsymm.cc
+++ b/src/routines/level3/xsymm.cc
@@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the symmetric-to-squared kernel
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 44d0024e..3206c669 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -91,7 +91,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 44ed8d35..741ad064 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -87,7 +87,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc
index 484cf040..9e3b27b4 100644
--- a/src/routines/level3/xtrmm.cc
+++ b/src/routines/level3/xtrmm.cc
@@ -63,7 +63,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the triangular matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the triangular-to-squared kernel