summaryrefslogtreecommitdiff
path: root/src/routine.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/routine.cc')
-rw-r--r--src/routine.cc11
1 files changed, 10 insertions, 1 deletions
diff --git a/src/routine.cc b/src/routine.cc
index 5f9b1c89..11c4281e 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -88,12 +88,21 @@ StatusCode Routine<T>::SetUp() {
// Adds the name of the routine as a define
defines += "#define ROUTINE_"+routine_name_+"\n";
+ // Determines whether this is a specific device
+ const auto isAMD = device_.Vendor() == "AMD" || device_.Vendor() == "Advanced Micro Devices, Inc.";
+ const auto isGPU = device_.Type() == "GPU";
+
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
- if (device_.Vendor() == "AMD") {
+ if (isAMD && isGPU) {
defines += "#define USE_CL_MAD 1\n";
}
+ // For specific devices, use staggered/shuffled workgroup indices.
+ if (isAMD && isGPU) {
+ defines += "#define USE_STAGGERED_INDICES 1\n";
+ }
+
// Combines everything together into a single source string
auto source_string = defines + common_header + source_string_;