diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-04-27 16:02:13 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-04-27 16:02:13 +0200 |
commit | 82be8f211cbd50d2d75fe78d8af4a1da04a0582b (patch) | |
tree | 19b4dbb4478edb28b7afd99a11e5a1e470098a5d | |
parent | 44bdb60e834ef015ee4cb25a6f0eba2a092291f0 (diff) |
Moved all cache-related functions to a separate file; added a ClearCompiledProgramCache function to clear the cache
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | include/clblast.h | 7 | ||||
-rw-r--r-- | include/clblast_c.h | 6 | ||||
-rw-r--r-- | include/internal/cache.h | 72 | ||||
-rw-r--r-- | include/internal/routine.h | 37 | ||||
-rw-r--r-- | scripts/generator/generator.py | 4 | ||||
-rw-r--r-- | src/cache.cc | 73 | ||||
-rw-r--r-- | src/clblast.cc | 6 | ||||
-rw-r--r-- | src/clblast_c.cc | 7 | ||||
-rw-r--r-- | src/routine.cc | 40 |
11 files changed, 191 insertions, 66 deletions
@@ -4,6 +4,8 @@ Development version (next release) - Made the library thread-safe - Performance and correctness tests can now (on top of clBLAS) be performed against CPU BLAS libraries - Fixed the use of events within the library +- Changed the enum parameters to match the raw values of the cblas standard +- Added a function to clear the cache of previously compiled programs - Added level-1 routines: * SNRM2/DNRM2/ScNRM2/DzNRM2 * SASUM/DASUM/ScASUM/DzASUM diff --git a/CMakeLists.txt b/CMakeLists.txt index efdf6be0..6abfc09f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,7 +131,8 @@ set(PRECISIONS 32 64 3232 6464) # ================================================================================================== # Gathers all source-files -set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc src/clblast_c.cc) +set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/cache.cc + src/utilities.cc src/clblast_c.cc) foreach(ROUTINE ${LEVEL1_ROUTINES}) set(SOURCES ${SOURCES} src/routines/level1/${ROUTINE}.cc) endforeach() diff --git a/include/clblast.h b/include/clblast.h index f73acb57..4a3ec9b6 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -539,6 +539,13 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c cl_command_queue* queue, cl_event* event = nullptr); // ================================================================================================= + +// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on +// for the same device. This cache can be cleared to free up system memory or in case of debugging. +StatusCode ClearCompiledProgramCache(); + +// ================================================================================================= + } // namespace clblast // CLBLAST_CLBLAST_H_ diff --git a/include/clblast_c.h b/include/clblast_c.h index 8c0a0792..1ca300ca 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -1036,6 +1036,12 @@ StatusCode PUBLIC_API CLBlastZtrsm(const Layout layout, const Side side, const T // ================================================================================================= +// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on +// for the same device. This cache can be cleared to free up system memory or in case of debugging. +StatusCode PUBLIC_API CLBlastClearCompiledProgramCache(); + +// ================================================================================================= + #ifdef __cplusplus } // extern "C" #endif diff --git a/include/internal/cache.h b/include/internal/cache.h new file mode 100644 index 00000000..44fad68d --- /dev/null +++ b/include/internal/cache.h @@ -0,0 +1,72 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the caching functionality of compiled binaries. +// +// ================================================================================================= + +#ifndef CLBLAST_CACHE_H_ +#define CLBLAST_CACHE_H_ + +#include <string> +#include <vector> +#include <mutex> + +#include "internal/utilities.h" + +namespace clblast { +namespace cache { +// ================================================================================================= + +// The cache of compiled OpenCL programs, along with some meta-data +struct ProgramCache { + Program program; + std::string device_name; + Precision precision; + std::string routine_name_; + + // Finds out whether the properties match + bool MatchInCache(const std::string &ref_device, const Precision &ref_precision, + const std::string &ref_routine) { + return (device_name == ref_device && + precision == ref_precision && + routine_name_ == ref_routine); + } +}; + +// The actual cache, implemented as a vector of the above data-type, and its mutex +static std::vector<ProgramCache> program_cache_; +static std::mutex program_cache_mutex_; + +// ================================================================================================= + +// Stores the compiled program in the cache +void StoreProgramToCache(const Program& program, const std::string &device_name, + const Precision &precision, const std::string &routine_name); + +// Queries the cache and retrieves a matching program. Assumes that the match is available, throws +// otherwise. +const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision, + const std::string &routine_name); + +// Queries the cache to see whether or not the compiled kernel is already there +bool ProgramIsInCache(const std::string &device_name, const Precision &precision, + const std::string &routine_name); + +// ================================================================================================= + +// Clears the cache of stored program binaries +StatusCode ClearCompiledProgramCache(); + +// ================================================================================================= +} // namespace cache +} // namespace clblast + +// CLBLAST_CACHE_H_ +#endif diff --git a/include/internal/routine.h b/include/internal/routine.h index b2b6f622..013769d8 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -18,8 +18,8 @@ #include <string> #include <vector> -#include <mutex> +#include "internal/cache.h" #include "internal/utilities.h" #include "internal/database.h" @@ -31,26 +31,6 @@ template <typename T> class Routine { public: - // The cache of compiled OpenCL programs, along with some meta-data - struct ProgramCache { - Program program; - std::string device_name; - Precision precision; - std::string routine_name_; - - // Finds out whether the properties match - bool MatchInCache(const std::string &ref_device, const Precision &ref_precision, - const std::string &ref_routine) { - return (device_name == ref_device && - precision == ref_precision && - routine_name_ == ref_routine); - } - }; - - // The actual cache, implemented as a vector of the above data-type, and its mutex - static std::vector<ProgramCache> program_cache_; - static std::mutex program_cache_mutex_; - // Helper functions which check for errors in the status code static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); } @@ -103,12 +83,21 @@ class Routine { const bool do_transpose, const bool do_conjugate, const bool upper = false, const bool lower = false, const bool diagonal_imag_zero = false); - + + // Stores a newly compiled program into the cache + void StoreProgramToCache(const Program& program) const { + return cache::StoreProgramToCache(program, device_name_, precision_, routine_name_); + } + // Queries the cache and retrieve either a matching program or a boolean whether a match exists. // The first assumes that the program is available in the cache and will throw an exception // otherwise. - const Program& GetProgramFromCache() const; - bool ProgramIsInCache() const; + const Program& GetProgramFromCache() const { + return cache::GetProgramFromCache(device_name_, precision_, routine_name_); + } + bool ProgramIsInCache() const { + return cache::ProgramIsInCache(device_name_, precision_, routine_name_); + } // Non-static variable for the precision. Note that the same variable (but static) might exist in // a derived class. diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 9a520fac..9de03567 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -294,8 +294,8 @@ files = [ path_clblast+"/test/wrapper_clblas.h", path_clblast+"/test/wrapper_cblas.h", ] -header_lines = [84, 67, 93, 22, 29, 38] -footer_lines = [6, 3, 9, 2, 6, 6] +header_lines = [84, 68, 93, 22, 29, 38] +footer_lines = [13, 8, 15, 9, 6, 6] # Checks whether the command-line arguments are valid; exists otherwise for f in files: diff --git a/src/cache.cc b/src/cache.cc new file mode 100644 index 00000000..beeb1b35 --- /dev/null +++ b/src/cache.cc @@ -0,0 +1,73 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the caching functionality of compiled binaries. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <mutex> + +#include "internal/cache.h" + +namespace clblast { +namespace cache { +// ================================================================================================= + +// Stores the compiled program in the cache +void StoreProgramToCache(const Program& program, const std::string &device_name, + const Precision &precision, const std::string &routine_name) { + program_cache_mutex_.lock(); + program_cache_.push_back({program, device_name, precision, routine_name}); + program_cache_mutex_.unlock(); +} + +// Queries the cache and retrieves a matching program. Assumes that the match is available, throws +// otherwise. +const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision, + const std::string &routine_name) { + program_cache_mutex_.lock(); + for (auto &cached_program: program_cache_) { + if (cached_program.MatchInCache(device_name, precision, routine_name)) { + program_cache_mutex_.unlock(); + return cached_program.program; + } + } + program_cache_mutex_.unlock(); + throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none."); +} + +// Queries the cache to see whether or not the compiled kernel is already there +bool ProgramIsInCache(const std::string &device_name, const Precision &precision, + const std::string &routine_name) { + program_cache_mutex_.lock(); + for (auto &cached_program: program_cache_) { + if (cached_program.MatchInCache(device_name, precision, routine_name)) { + program_cache_mutex_.unlock(); + return true; + } + } + program_cache_mutex_.unlock(); + return false; +} + +// ================================================================================================= + +// Clears the cache of stored program binaries +StatusCode ClearCompiledProgramCache() { + program_cache_mutex_.lock(); + program_cache_.clear(); + program_cache_mutex_.unlock(); + return StatusCode::kSuccess; +} + +// ================================================================================================= +} // namespace cache +} // namespace clblast diff --git a/src/clblast.cc b/src/clblast.cc index 145b6bf6..b6efd185 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -17,6 +17,7 @@ #include "clblast.h" #include "internal/public_api.h" +#include "internal/cache.h" // BLAS level-1 includes #include "internal/routines/level1/xswap.h" @@ -1788,4 +1789,9 @@ template StatusCode PUBLIC_API Trsm<double2>(const Layout, const Side, const Tri cl_command_queue*, cl_event*); // ================================================================================================= + +// Clears the cache of stored program binaries +StatusCode ClearCompiledProgramCache() { return cache::ClearCompiledProgramCache(); } + +// ================================================================================================= } // namespace clblast diff --git a/src/clblast_c.cc b/src/clblast_c.cc index 23c96feb..6e238b77 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -2258,3 +2258,10 @@ StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle tri } // ================================================================================================= + +// Clears the cache of stored program binaries +StatusCode CLBlastClearCompiledProgramCache() { + return static_cast<StatusCode>(clblast::ClearCompiledProgramCache()); +} + +// ================================================================================================= diff --git a/src/routine.cc b/src/routine.cc index b5ba63eb..e0a75e41 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -13,17 +13,12 @@ #include <string> #include <vector> -#include <mutex> #include "internal/routine.h" namespace clblast { // ================================================================================================= -// The cache of compiled OpenCL programs and its mutex for thread safety -template <typename T> std::vector<typename Routine<T>::ProgramCache> Routine<T>::program_cache_; -template <typename T> std::mutex Routine<T>::program_cache_mutex_; - // Constructor: not much here, because no status codes can be returned template <typename T> Routine<T>::Routine(Queue &queue, EventPointer event, const std::string &name, @@ -102,9 +97,7 @@ StatusCode Routine<T>::SetUp() { if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; } // Store the compiled program in the cache (atomic for thread-safety) - program_cache_mutex_.lock(); - program_cache_.push_back({program, device_name_, precision_, routine_name_}); - program_cache_mutex_.unlock(); + StoreProgramToCache(program); } catch (...) { return StatusCode::kBuildProgramFailure; } } @@ -374,37 +367,6 @@ StatusCode Routine<T>::PadCopyTransposeMatrix(EventPointer event, std::vector<Ev // ================================================================================================= -// Queries the cache and retrieves a matching program. Assumes that the match is available, throws -// otherwise. -template <typename T> -const Program& Routine<T>::GetProgramFromCache() const { - program_cache_mutex_.lock(); - for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { - program_cache_mutex_.unlock(); - return cached_program.program; - } - } - program_cache_mutex_.unlock(); - throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none."); -} - -// Queries the cache to see whether or not the compiled kernel is already there -template <typename T> -bool Routine<T>::ProgramIsInCache() const { - program_cache_mutex_.lock(); - for (auto &cached_program: program_cache_) { - if (cached_program.MatchInCache(device_name_, precision_, routine_name_)) { - program_cache_mutex_.unlock(); - return true; - } - } - program_cache_mutex_.unlock(); - return false; -} - -// ================================================================================================= - // Compiles the templated class template class Routine<float>; template class Routine<double>; |