summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-04-27 16:02:13 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-04-27 16:02:13 +0200
commit82be8f211cbd50d2d75fe78d8af4a1da04a0582b (patch)
tree19b4dbb4478edb28b7afd99a11e5a1e470098a5d
parent44bdb60e834ef015ee4cb25a6f0eba2a092291f0 (diff)
Moved all cache-related functions to a separate file; added a ClearCompiledProgramCache function to clear the cache
-rw-r--r--CHANGELOG2
-rw-r--r--CMakeLists.txt3
-rw-r--r--include/clblast.h7
-rw-r--r--include/clblast_c.h6
-rw-r--r--include/internal/cache.h72
-rw-r--r--include/internal/routine.h37
-rw-r--r--scripts/generator/generator.py4
-rw-r--r--src/cache.cc73
-rw-r--r--src/clblast.cc6
-rw-r--r--src/clblast_c.cc7
-rw-r--r--src/routine.cc40
11 files changed, 191 insertions, 66 deletions
diff --git a/CHANGELOG b/CHANGELOG
index c9770dc2..4c6a9be5 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,8 @@ Development version (next release)
- Made the library thread-safe
- Performance and correctness tests can now (on top of clBLAS) be performed against CPU BLAS libraries
- Fixed the use of events within the library
+- Changed the enum parameters to match the raw values of the cblas standard
+- Added a function to clear the cache of previously compiled programs
- Added level-1 routines:
* SNRM2/DNRM2/ScNRM2/DzNRM2
* SASUM/DASUM/ScASUM/DzASUM
diff --git a/CMakeLists.txt b/CMakeLists.txt
index efdf6be0..6abfc09f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -131,7 +131,8 @@ set(PRECISIONS 32 64 3232 6464)
# ==================================================================================================
# Gathers all source-files
-set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc src/clblast_c.cc)
+set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/cache.cc
+ src/utilities.cc src/clblast_c.cc)
foreach(ROUTINE ${LEVEL1_ROUTINES})
set(SOURCES ${SOURCES} src/routines/level1/${ROUTINE}.cc)
endforeach()
diff --git a/include/clblast.h b/include/clblast.h
index f73acb57..4a3ec9b6 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -539,6 +539,13 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
cl_command_queue* queue, cl_event* event = nullptr);
// =================================================================================================
+
+// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
+// for the same device. This cache can be cleared to free up system memory or in case of debugging.
+StatusCode ClearCompiledProgramCache();
+
+// =================================================================================================
+
} // namespace clblast
// CLBLAST_CLBLAST_H_
diff --git a/include/clblast_c.h b/include/clblast_c.h
index 8c0a0792..1ca300ca 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -1036,6 +1036,12 @@ StatusCode PUBLIC_API CLBlastZtrsm(const Layout layout, const Side side, const T
// =================================================================================================
+// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
+// for the same device. This cache can be cleared to free up system memory or in case of debugging.
+StatusCode PUBLIC_API CLBlastClearCompiledProgramCache();
+
+// =================================================================================================
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/include/internal/cache.h b/include/internal/cache.h
new file mode 100644
index 00000000..44fad68d
--- /dev/null
+++ b/include/internal/cache.h
@@ -0,0 +1,72 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the caching functionality of compiled binaries.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_CACHE_H_
+#define CLBLAST_CACHE_H_
+
+#include <string>
+#include <vector>
+#include <mutex>
+
+#include "internal/utilities.h"
+
+namespace clblast {
+namespace cache {
+// =================================================================================================
+
+// The cache of compiled OpenCL programs, along with some meta-data
+struct ProgramCache {
+ Program program;
+ std::string device_name;
+ Precision precision;
+ std::string routine_name_;
+
+ // Finds out whether the properties match
+ bool MatchInCache(const std::string &ref_device, const Precision &ref_precision,
+ const std::string &ref_routine) {
+ return (device_name == ref_device &&
+ precision == ref_precision &&
+ routine_name_ == ref_routine);
+ }
+};
+
+// The actual cache, implemented as a vector of the above data-type, and its mutex
+static std::vector<ProgramCache> program_cache_;
+static std::mutex program_cache_mutex_;
+
+// =================================================================================================
+
+// Stores the compiled program in the cache
+void StoreProgramToCache(const Program& program, const std::string &device_name,
+ const Precision &precision, const std::string &routine_name);
+
+// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
+// otherwise.
+const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name);
+
+// Queries the cache to see whether or not the compiled kernel is already there
+bool ProgramIsInCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name);
+
+// =================================================================================================
+
+// Clears the cache of stored program binaries
+StatusCode ClearCompiledProgramCache();
+
+// =================================================================================================
+} // namespace cache
+} // namespace clblast
+
+// CLBLAST_CACHE_H_
+#endif
diff --git a/include/internal/routine.h b/include/internal/routine.h
index b2b6f622..013769d8 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -18,8 +18,8 @@
#include <string>
#include <vector>
-#include <mutex>
+#include "internal/cache.h"
#include "internal/utilities.h"
#include "internal/database.h"
@@ -31,26 +31,6 @@ template <typename T>
class Routine {
public:
- // The cache of compiled OpenCL programs, along with some meta-data
- struct ProgramCache {
- Program program;
- std::string device_name;
- Precision precision;
- std::string routine_name_;
-
- // Finds out whether the properties match
- bool MatchInCache(const std::string &ref_device, const Precision &ref_precision,
- const std::string &ref_routine) {
- return (device_name == ref_device &&
- precision == ref_precision &&
- routine_name_ == ref_routine);
- }
- };
-
- // The actual cache, implemented as a vector of the above data-type, and its mutex
- static std::vector<ProgramCache> program_cache_;
- static std::mutex program_cache_mutex_;
-
// Helper functions which check for errors in the status code
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
@@ -103,12 +83,21 @@ class Routine {
const bool do_transpose, const bool do_conjugate,
const bool upper = false, const bool lower = false,
const bool diagonal_imag_zero = false);
-
+
+ // Stores a newly compiled program into the cache
+ void StoreProgramToCache(const Program& program) const {
+ return cache::StoreProgramToCache(program, device_name_, precision_, routine_name_);
+ }
+
// Queries the cache and retrieve either a matching program or a boolean whether a match exists.
// The first assumes that the program is available in the cache and will throw an exception
// otherwise.
- const Program& GetProgramFromCache() const;
- bool ProgramIsInCache() const;
+ const Program& GetProgramFromCache() const {
+ return cache::GetProgramFromCache(device_name_, precision_, routine_name_);
+ }
+ bool ProgramIsInCache() const {
+ return cache::ProgramIsInCache(device_name_, precision_, routine_name_);
+ }
// Non-static variable for the precision. Note that the same variable (but static) might exist in
// a derived class.
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index 9a520fac..9de03567 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -294,8 +294,8 @@ files = [
path_clblast+"/test/wrapper_clblas.h",
path_clblast+"/test/wrapper_cblas.h",
]
-header_lines = [84, 67, 93, 22, 29, 38]
-footer_lines = [6, 3, 9, 2, 6, 6]
+header_lines = [84, 68, 93, 22, 29, 38]
+footer_lines = [13, 8, 15, 9, 6, 6]
# Checks whether the command-line arguments are valid; exists otherwise
for f in files:
diff --git a/src/cache.cc b/src/cache.cc
new file mode 100644
index 00000000..beeb1b35
--- /dev/null
+++ b/src/cache.cc
@@ -0,0 +1,73 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the caching functionality of compiled binaries.
+//
+// =================================================================================================
+
+#include <string>
+#include <vector>
+#include <mutex>
+
+#include "internal/cache.h"
+
+namespace clblast {
+namespace cache {
+// =================================================================================================
+
+// Stores the compiled program in the cache
+void StoreProgramToCache(const Program& program, const std::string &device_name,
+ const Precision &precision, const std::string &routine_name) {
+ program_cache_mutex_.lock();
+ program_cache_.push_back({program, device_name, precision, routine_name});
+ program_cache_mutex_.unlock();
+}
+
+// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
+// otherwise.
+const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ program_cache_mutex_.lock();
+ for (auto &cached_program: program_cache_) {
+ if (cached_program.MatchInCache(device_name, precision, routine_name)) {
+ program_cache_mutex_.unlock();
+ return cached_program.program;
+ }
+ }
+ program_cache_mutex_.unlock();
+ throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none.");
+}
+
+// Queries the cache to see whether or not the compiled kernel is already there
+bool ProgramIsInCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ program_cache_mutex_.lock();
+ for (auto &cached_program: program_cache_) {
+ if (cached_program.MatchInCache(device_name, precision, routine_name)) {
+ program_cache_mutex_.unlock();
+ return true;
+ }
+ }
+ program_cache_mutex_.unlock();
+ return false;
+}
+
+// =================================================================================================
+
+// Clears the cache of stored program binaries
+StatusCode ClearCompiledProgramCache() {
+ program_cache_mutex_.lock();
+ program_cache_.clear();
+ program_cache_mutex_.unlock();
+ return StatusCode::kSuccess;
+}
+
+// =================================================================================================
+} // namespace cache
+} // namespace clblast
diff --git a/src/clblast.cc b/src/clblast.cc
index 145b6bf6..b6efd185 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -17,6 +17,7 @@
#include "clblast.h"
#include "internal/public_api.h"
+#include "internal/cache.h"
// BLAS level-1 includes
#include "internal/routines/level1/xswap.h"
@@ -1788,4 +1789,9 @@ template StatusCode PUBLIC_API Trsm<double2>(const Layout, const Side, const Tri
cl_command_queue*, cl_event*);
// =================================================================================================
+
+// Clears the cache of stored program binaries
+StatusCode ClearCompiledProgramCache() { return cache::ClearCompiledProgramCache(); }
+
+// =================================================================================================
} // namespace clblast
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 23c96feb..6e238b77 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -2258,3 +2258,10 @@ StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle tri
}
// =================================================================================================
+
+// Clears the cache of stored program binaries
+StatusCode CLBlastClearCompiledProgramCache() {
+ return static_cast<StatusCode>(clblast::ClearCompiledProgramCache());
+}
+
+// =================================================================================================
diff --git a/src/routine.cc b/src/routine.cc
index b5ba63eb..e0a75e41 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -13,17 +13,12 @@
#include <string>
#include <vector>
-#include <mutex>
#include "internal/routine.h"
namespace clblast {
// =================================================================================================
-// The cache of compiled OpenCL programs and its mutex for thread safety
-template <typename T> std::vector<typename Routine<T>::ProgramCache> Routine<T>::program_cache_;
-template <typename T> std::mutex Routine<T>::program_cache_mutex_;
-
// Constructor: not much here, because no status codes can be returned
template <typename T>
Routine<T>::Routine(Queue &queue, EventPointer event, const std::string &name,
@@ -102,9 +97,7 @@ StatusCode Routine<T>::SetUp() {
if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
// Store the compiled program in the cache (atomic for thread-safety)
- program_cache_mutex_.lock();
- program_cache_.push_back({program, device_name_, precision_, routine_name_});
- program_cache_mutex_.unlock();
+ StoreProgramToCache(program);
} catch (...) { return StatusCode::kBuildProgramFailure; }
}
@@ -374,37 +367,6 @@ StatusCode Routine<T>::PadCopyTransposeMatrix(EventPointer event, std::vector<Ev
// =================================================================================================
-// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
-// otherwise.
-template <typename T>
-const Program& Routine<T>::GetProgramFromCache() const {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) {
- program_cache_mutex_.unlock();
- return cached_program.program;
- }
- }
- program_cache_mutex_.unlock();
- throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none.");
-}
-
-// Queries the cache to see whether or not the compiled kernel is already there
-template <typename T>
-bool Routine<T>::ProgramIsInCache() const {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) {
- program_cache_mutex_.unlock();
- return true;
- }
- }
- program_cache_mutex_.unlock();
- return false;
-}
-
-// =================================================================================================
-
// Compiles the templated class
template class Routine<float>;
template class Routine<double>;