summaryrefslogtreecommitdiff
path: root/src/routine.cpp
diff options
context:
space:
mode:
authorIvan Shapovalov <intelfx@intelfx.name>2016-10-18 04:53:06 +0300
committerIvan Shapovalov <intelfx@intelfx.name>2016-10-22 08:45:27 +0300
commit56f300607b1d0b81ab3269894fda5a066c46cdeb (patch)
tree60b4c5566cc4bbfad15a7791a4e20c2a60e16707 /src/routine.cpp
parentb98af44fcf89b9946e1de438b1f5527e6bf28905 (diff)
Routine: get rid of ::SetUp()
Since we now use C++ exceptions inside the implementation (and exceptions can be thrown from constructors), there is no need for a separate Routine::SetUp() function. For this, we also change the way how the kernel source string is constructed. The kernel-specific source code is now passed to the Routine ctor via an initializer_list of C strings to avoid unnecessary data copying while also working around C1091 of MSVC 2013.
Diffstat (limited to 'src/routine.cpp')
-rw-r--r--src/routine.cpp39
1 files changed, 18 insertions, 21 deletions
diff --git a/src/routine.cpp b/src/routine.cpp
index 5e3a9dfe..acafb0d2 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -21,10 +21,11 @@
namespace clblast {
// =================================================================================================
-// Constructor: not much here, because no status codes can be returned
+// The constructor does all heavy work, errors are returned as exceptions
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase):
+ const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ std::initializer_list<const char *> source):
precision_(precision),
routine_name_(name),
queue_(queue),
@@ -33,12 +34,6 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
device_(queue_.GetDevice()),
device_name_(device_.Name()),
db_(queue_, routines, precision_, userDatabase) {
-}
-
-// =================================================================================================
-
-// Separate set-up function to allow for status codes to be returned
-void Routine::SetUp() {
// Queries the cache to see whether or not the program (context-specific) is already there
if (ProgramIsInCache(context_, precision_, routine_name_)) { return; }
@@ -77,37 +72,39 @@ void Routine::SetUp() {
}
}
- // Loads the common header (typedefs and defines and such)
- std::string common_header =
- #include "kernels/common.opencl"
- ;
-
// Collects the parameters for this device in the form of defines, and adds the precision
- auto defines = db_.GetDefines();
- defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ auto source_string = db_.GetDefines();
+ source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
// Adds the name of the routine as a define
- defines += "#define ROUTINE_"+routine_name_+"\n";
+ source_string += "#define ROUTINE_"+routine_name_+"\n";
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_CL_MAD 1\n";
+ source_string += "#define USE_CL_MAD 1\n";
}
// For specific devices, use staggered/shuffled workgroup indices.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_STAGGERED_INDICES 1\n";
+ source_string += "#define USE_STAGGERED_INDICES 1\n";
}
// For specific devices add a global synchronisation barrier to the GEMM kernel to optimize
// performance through better cache behaviour
if (device_.IsARM() && device_.IsGPU()) {
- defines += "#define GLOBAL_MEM_FENCE 1\n";
+ source_string += "#define GLOBAL_MEM_FENCE 1\n";
}
- // Combines everything together into a single source string
- const auto source_string = defines + common_header + source_string_;
+ // Loads the common header (typedefs and defines and such)
+ source_string +=
+ #include "kernels/common.opencl"
+ ;
+
+ // Adds routine-specific code to the constructed source string
+ for (const char *s: source) {
+ source_string += s;
+ }
// Prints details of the routine to compile in case of debugging in verbose mode
#ifdef VERBOSE