diff options
author | Ivan Shapovalov <intelfx@intelfx.name> | 2016-10-18 04:53:06 +0300 |
---|---|---|
committer | Ivan Shapovalov <intelfx@intelfx.name> | 2016-10-22 08:45:27 +0300 |
commit | 56f300607b1d0b81ab3269894fda5a066c46cdeb (patch) | |
tree | 60b4c5566cc4bbfad15a7791a4e20c2a60e16707 /src/routine.cpp | |
parent | b98af44fcf89b9946e1de438b1f5527e6bf28905 (diff) |
Routine: get rid of ::SetUp()
Since we now use C++ exceptions inside the implementation (and exceptions
can be thrown from constructors), there is no need for a separate
Routine::SetUp() function.
For this, we also change the way how the kernel source string is constructed.
The kernel-specific source code is now passed to the Routine ctor via
an initializer_list of C strings to avoid unnecessary data copying
while also working around C1091 of MSVC 2013.
Diffstat (limited to 'src/routine.cpp')
-rw-r--r-- | src/routine.cpp | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/src/routine.cpp b/src/routine.cpp index 5e3a9dfe..acafb0d2 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -21,10 +21,11 @@ namespace clblast { // ================================================================================================= -// Constructor: not much here, because no status codes can be returned +// The constructor does all heavy work, errors are returned as exceptions Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector<std::string> &routines, const Precision precision, - const std::vector<const Database::DatabaseEntry*> &userDatabase): + const std::vector<const Database::DatabaseEntry*> &userDatabase, + std::initializer_list<const char *> source): precision_(precision), routine_name_(name), queue_(queue), @@ -33,12 +34,6 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name, device_(queue_.GetDevice()), device_name_(device_.Name()), db_(queue_, routines, precision_, userDatabase) { -} - -// ================================================================================================= - -// Separate set-up function to allow for status codes to be returned -void Routine::SetUp() { // Queries the cache to see whether or not the program (context-specific) is already there if (ProgramIsInCache(context_, precision_, routine_name_)) { return; } @@ -77,37 +72,39 @@ void Routine::SetUp() { } } - // Loads the common header (typedefs and defines and such) - std::string common_header = - #include "kernels/common.opencl" - ; - // Collects the parameters for this device in the form of defines, and adds the precision - auto defines = db_.GetDefines(); - defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; + auto source_string = db_.GetDefines(); + source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n"; // Adds the name of the routine as a define - defines += "#define ROUTINE_"+routine_name_+"\n"; + source_string += "#define ROUTINE_"+routine_name_+"\n"; // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_CL_MAD 1\n"; + source_string += "#define USE_CL_MAD 1\n"; } // For specific devices, use staggered/shuffled workgroup indices. if (device_.IsAMD() && device_.IsGPU()) { - defines += "#define USE_STAGGERED_INDICES 1\n"; + source_string += "#define USE_STAGGERED_INDICES 1\n"; } // For specific devices add a global synchronisation barrier to the GEMM kernel to optimize // performance through better cache behaviour if (device_.IsARM() && device_.IsGPU()) { - defines += "#define GLOBAL_MEM_FENCE 1\n"; + source_string += "#define GLOBAL_MEM_FENCE 1\n"; } - // Combines everything together into a single source string - const auto source_string = defines + common_header + source_string_; + // Loads the common header (typedefs and defines and such) + source_string += + #include "kernels/common.opencl" + ; + + // Adds routine-specific code to the constructed source string + for (const char *s: source) { + source_string += s; + } // Prints details of the routine to compile in case of debugging in verbose mode #ifdef VERBOSE |