diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-01 12:56:08 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-01 12:56:08 +0200 |
commit | 9602c150aa3b7f0a392207bef8cbb6048b1da891 (patch) | |
tree | b9b6f2c2300d77427ee6121151efa00a11c60859 /src | |
parent | e113ff0852d21ecb898b3b192145b70cad3f338a (diff) |
Added a program cache (per-context) next to the per-device binary cache
Diffstat (limited to 'src')
-rw-r--r-- | src/cache.cc | 48 | ||||
-rw-r--r-- | src/routine.cc | 113 |
2 files changed, 108 insertions, 53 deletions
diff --git a/src/cache.cc b/src/cache.cc index 18731a51..4dbdb711 100644 --- a/src/cache.cc +++ b/src/cache.cc @@ -7,7 +7,7 @@ // Author(s): // Cedric Nugteren <www.cedricnugteren.nl> // -// This file implements the caching functionality of compiled binaries. +// This file implements the caching functionality of compiled binaries and programs. // // ================================================================================================= @@ -22,13 +22,21 @@ namespace cache { // ================================================================================================= // Stores the compiled binary or IR in the cache -void StoreBinaryToCache(const std::string& binary, const std::string &device_name, +void StoreBinaryToCache(const std::string &binary, const std::string &device_name, const Precision &precision, const std::string &routine_name) { binary_cache_mutex_.lock(); - binary_cache_.push_back({binary, device_name, precision, routine_name}); + binary_cache_.push_back(BinaryCache{binary, device_name, precision, routine_name}); binary_cache_mutex_.unlock(); } +// Stores the compiled program in the cache +void StoreProgramToCache(const Program &program, const Context &context, + const Precision &precision, const std::string &routine_name) { + program_cache_mutex_.lock(); + program_cache_.push_back(ProgramCache{program, context.pointer(), precision, routine_name}); + program_cache_mutex_.unlock(); +} + // Queries the cache and retrieves a matching binary. Assumes that the match is available, throws // otherwise. const std::string& GetBinaryFromCache(const std::string &device_name, const Precision &precision, @@ -44,6 +52,21 @@ const std::string& GetBinaryFromCache(const std::string &device_name, const Prec throw std::runtime_error("Internal CLBlast error: Expected binary in cache, but found none."); } +// Queries the cache and retrieves a matching program. Assumes that the match is available, throws +// otherwise. +const Program& GetProgramFromCache(const Context &context, const Precision &precision, + const std::string &routine_name) { + program_cache_mutex_.lock(); + for (auto &cached_program: program_cache_) { + if (cached_program.MatchInCache(context.pointer(), precision, routine_name)) { + program_cache_mutex_.unlock(); + return cached_program.program; + } + } + program_cache_mutex_.unlock(); + throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none."); +} + // Queries the cache to see whether or not the compiled kernel is already there bool BinaryIsInCache(const std::string &device_name, const Precision &precision, const std::string &routine_name) { @@ -58,13 +81,30 @@ bool BinaryIsInCache(const std::string &device_name, const Precision &precision, return false; } +// Queries the cache to see whether or not the compiled kernel is already there +bool ProgramIsInCache(const Context &context, const Precision &precision, + const std::string &routine_name) { + program_cache_mutex_.lock(); + for (auto &cached_program: program_cache_) { + if (cached_program.MatchInCache(context.pointer(), precision, routine_name)) { + program_cache_mutex_.unlock(); + return true; + } + } + program_cache_mutex_.unlock(); + return false; +} + // ================================================================================================= -// Clears the cache of stored binaries +// Clears the cache of stored binaries and programs StatusCode ClearCache() { binary_cache_mutex_.lock(); binary_cache_.clear(); binary_cache_mutex_.unlock(); + program_cache_mutex_.lock(); + program_cache_.clear(); + program_cache_mutex_.unlock(); return StatusCode::kSuccess; } diff --git a/src/routine.cc b/src/routine.cc index cd4d82fb..35d0653c 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -42,66 +42,81 @@ Routine<T>::Routine(Queue &queue, EventPointer event, const std::string &name, template <typename T> StatusCode Routine<T>::SetUp() { - // Queries the cache to see whether or not the compiled kernel is already there. If not, it will - // be built and added to the cache. - if (!ProgramIsInCache()) { - - // Inspects whether or not cl_khr_fp64 is supported in case of double precision - auto extensions = device_.Capabilities(); - if (precision_ == Precision::kDouble || precision_ == Precision::kComplexDouble) { - if (extensions.find(kKhronosDoublePrecision) == std::string::npos) { - return StatusCode::kNoDoublePrecision; - } - } - - // As above, but for cl_khr_fp16 (half precision) - if (precision_ == Precision::kHalf) { - if (extensions.find(kKhronosHalfPrecision) == std::string::npos) { - return StatusCode::kNoHalfPrecision; - } - } + // Queries the cache to see whether or not the program (context-specific) is already there + if (ProgramIsInCache()) { return StatusCode::kSuccess; } - // Loads the common header (typedefs and defines and such) - std::string common_header = - #include "kernels/common.opencl" - ; + // Queries the cache to see whether or not the binary (device-specific) is already there. If it + // is, a program is created and stored in the cache + if (BinaryIsInCache()) { + try { + auto& binary = cache::GetBinaryFromCache(device_name_, precision_, routine_name_); + auto program = Program(device_, context_, binary); + auto options = std::vector<std::string>(); + program.Build(device_, options); + StoreProgramToCache(program); + } catch (...) { return StatusCode::kBuildProgramFailure; } + return StatusCode::kSuccess; + } - // Collects the parameters for this device in the form of defines, and adds the precision - auto defines = db_.GetDefines(); - defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; + // Otherwise, the kernel will be compiled and program will be built. Both the binary and the + // program will be added to the cache. - // Adds the name of the routine as a define - defines += "#define ROUTINE_"+routine_name_+"\n"; + // Inspects whether or not cl_khr_fp64 is supported in case of double precision + auto extensions = device_.Capabilities(); + if (precision_ == Precision::kDouble || precision_ == Precision::kComplexDouble) { + if (extensions.find(kKhronosDoublePrecision) == std::string::npos) { + return StatusCode::kNoDoublePrecision; + } + } - // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve - // performance, but might result in a reduced accuracy. - if (device_.Vendor() == "AMD") { - defines += "#define USE_CL_MAD 1\n"; + // As above, but for cl_khr_fp16 (half precision) + if (precision_ == Precision::kHalf) { + if (extensions.find(kKhronosHalfPrecision) == std::string::npos) { + return StatusCode::kNoHalfPrecision; } + } - // Combines everything together into a single source string - auto source_string = defines + common_header + source_string_; + // Loads the common header (typedefs and defines and such) + std::string common_header = + #include "kernels/common.opencl" + ; - // Compiles the kernel - try { - auto program = Program(context_, source_string); - auto options = std::vector<std::string>(); - auto build_status = program.Build(device_, options); + // Collects the parameters for this device in the form of defines, and adds the precision + auto defines = db_.GetDefines(); + defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; - // Checks for compiler crashes/errors/warnings - if (build_status == BuildStatus::kError) { - auto message = program.GetBuildInfo(device_); - fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str()); - return StatusCode::kBuildProgramFailure; - } - if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; } + // Adds the name of the routine as a define + defines += "#define ROUTINE_"+routine_name_+"\n"; - // Store the compiled kernel in the cache - auto binary = program.GetIR(); - StoreBinaryToCache(binary); - } catch (...) { return StatusCode::kBuildProgramFailure; } + // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve + // performance, but might result in a reduced accuracy. + if (device_.Vendor() == "AMD") { + defines += "#define USE_CL_MAD 1\n"; } + // Combines everything together into a single source string + auto source_string = defines + common_header + source_string_; + + // Compiles the kernel + try { + auto program = Program(context_, source_string); + auto options = std::vector<std::string>(); + auto build_status = program.Build(device_, options); + + // Checks for compiler crashes/errors/warnings + if (build_status == BuildStatus::kError) { + auto message = program.GetBuildInfo(device_); + fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str()); + return StatusCode::kBuildProgramFailure; + } + if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; } + + // Store the compiled binary and program in the cache + const auto binary = program.GetIR(); + StoreBinaryToCache(binary); + StoreProgramToCache(program); + } catch (...) { return StatusCode::kBuildProgramFailure; } + // No errors, normal termination of this function return StatusCode::kSuccess; } |