summaryrefslogtreecommitdiff
path: root/src/routine.cc
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-01 12:56:08 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-01 12:56:08 +0200
commit9602c150aa3b7f0a392207bef8cbb6048b1da891 (patch)
treeb9b6f2c2300d77427ee6121151efa00a11c60859 /src/routine.cc
parente113ff0852d21ecb898b3b192145b70cad3f338a (diff)
Added a program cache (per-context) next to the per-device binary cache
Diffstat (limited to 'src/routine.cc')
-rw-r--r--src/routine.cc113
1 files changed, 64 insertions, 49 deletions
diff --git a/src/routine.cc b/src/routine.cc
index cd4d82fb..35d0653c 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -42,66 +42,81 @@ Routine<T>::Routine(Queue &queue, EventPointer event, const std::string &name,
template <typename T>
StatusCode Routine<T>::SetUp() {
- // Queries the cache to see whether or not the compiled kernel is already there. If not, it will
- // be built and added to the cache.
- if (!ProgramIsInCache()) {
-
- // Inspects whether or not cl_khr_fp64 is supported in case of double precision
- auto extensions = device_.Capabilities();
- if (precision_ == Precision::kDouble || precision_ == Precision::kComplexDouble) {
- if (extensions.find(kKhronosDoublePrecision) == std::string::npos) {
- return StatusCode::kNoDoublePrecision;
- }
- }
-
- // As above, but for cl_khr_fp16 (half precision)
- if (precision_ == Precision::kHalf) {
- if (extensions.find(kKhronosHalfPrecision) == std::string::npos) {
- return StatusCode::kNoHalfPrecision;
- }
- }
+ // Queries the cache to see whether or not the program (context-specific) is already there
+ if (ProgramIsInCache()) { return StatusCode::kSuccess; }
- // Loads the common header (typedefs and defines and such)
- std::string common_header =
- #include "kernels/common.opencl"
- ;
+ // Queries the cache to see whether or not the binary (device-specific) is already there. If it
+ // is, a program is created and stored in the cache
+ if (BinaryIsInCache()) {
+ try {
+ auto& binary = cache::GetBinaryFromCache(device_name_, precision_, routine_name_);
+ auto program = Program(device_, context_, binary);
+ auto options = std::vector<std::string>();
+ program.Build(device_, options);
+ StoreProgramToCache(program);
+ } catch (...) { return StatusCode::kBuildProgramFailure; }
+ return StatusCode::kSuccess;
+ }
- // Collects the parameters for this device in the form of defines, and adds the precision
- auto defines = db_.GetDefines();
- defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ // Otherwise, the kernel will be compiled and program will be built. Both the binary and the
+ // program will be added to the cache.
- // Adds the name of the routine as a define
- defines += "#define ROUTINE_"+routine_name_+"\n";
+ // Inspects whether or not cl_khr_fp64 is supported in case of double precision
+ auto extensions = device_.Capabilities();
+ if (precision_ == Precision::kDouble || precision_ == Precision::kComplexDouble) {
+ if (extensions.find(kKhronosDoublePrecision) == std::string::npos) {
+ return StatusCode::kNoDoublePrecision;
+ }
+ }
- // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
- // performance, but might result in a reduced accuracy.
- if (device_.Vendor() == "AMD") {
- defines += "#define USE_CL_MAD 1\n";
+ // As above, but for cl_khr_fp16 (half precision)
+ if (precision_ == Precision::kHalf) {
+ if (extensions.find(kKhronosHalfPrecision) == std::string::npos) {
+ return StatusCode::kNoHalfPrecision;
}
+ }
- // Combines everything together into a single source string
- auto source_string = defines + common_header + source_string_;
+ // Loads the common header (typedefs and defines and such)
+ std::string common_header =
+ #include "kernels/common.opencl"
+ ;
- // Compiles the kernel
- try {
- auto program = Program(context_, source_string);
- auto options = std::vector<std::string>();
- auto build_status = program.Build(device_, options);
+ // Collects the parameters for this device in the form of defines, and adds the precision
+ auto defines = db_.GetDefines();
+ defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
- // Checks for compiler crashes/errors/warnings
- if (build_status == BuildStatus::kError) {
- auto message = program.GetBuildInfo(device_);
- fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str());
- return StatusCode::kBuildProgramFailure;
- }
- if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
+ // Adds the name of the routine as a define
+ defines += "#define ROUTINE_"+routine_name_+"\n";
- // Store the compiled kernel in the cache
- auto binary = program.GetIR();
- StoreBinaryToCache(binary);
- } catch (...) { return StatusCode::kBuildProgramFailure; }
+ // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
+ // performance, but might result in a reduced accuracy.
+ if (device_.Vendor() == "AMD") {
+ defines += "#define USE_CL_MAD 1\n";
}
+ // Combines everything together into a single source string
+ auto source_string = defines + common_header + source_string_;
+
+ // Compiles the kernel
+ try {
+ auto program = Program(context_, source_string);
+ auto options = std::vector<std::string>();
+ auto build_status = program.Build(device_, options);
+
+ // Checks for compiler crashes/errors/warnings
+ if (build_status == BuildStatus::kError) {
+ auto message = program.GetBuildInfo(device_);
+ fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str());
+ return StatusCode::kBuildProgramFailure;
+ }
+ if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
+
+ // Store the compiled binary and program in the cache
+ const auto binary = program.GetIR();
+ StoreBinaryToCache(binary);
+ StoreProgramToCache(program);
+ } catch (...) { return StatusCode::kBuildProgramFailure; }
+
// No errors, normal termination of this function
return StatusCode::kSuccess;
}