summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-16 22:42:02 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-16 22:42:02 +0200
commit0157d6d4ea50a789457637f98a373d31b62c1496 (patch)
treeba102720811cfa93b42a0595ada20866c721d286 /src
parent3bb1b5fa6e26ca95065ac68a48f4b0a51870fe88 (diff)
Using mad() instruction for AMD devices like clBLAS does
Diffstat (limited to 'src')
-rw-r--r--src/kernels/common.opencl7
-rw-r--r--src/routine.cc8
2 files changed, 13 insertions, 2 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 0d29c7a6..2e1d8f90 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -82,8 +82,11 @@ R"(
// =================================================================================================
-// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction
-#define USE_CL_MAD 0
+// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction per default. For specific
+// devices, this is enabled (see src/routine.cc).
+#ifndef USE_CL_MAD
+ #define USE_CL_MAD 0
+#endif
// Sets a variable to zero
#if PRECISION == 3232 || PRECISION == 6464
diff --git a/src/routine.cc b/src/routine.cc
index 339027d4..27bfa8f9 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -68,6 +68,14 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
// Collects the parameters for this device in the form of defines, and adds the precision
auto defines = db_.GetDefines();
defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+
+ // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
+ // performance, but might result in a reduced accuracy.
+ if (device_.Vendor() == "AMD") {
+ defines += "#define USE_CL_MAD 1\n";
+ }
+
+ // Combines everything together into a single source string
auto source_string = defines + common_header + routine_source;
// Compiles the kernel