diff options
Diffstat (limited to 'src/routine.cc')
-rw-r--r-- | src/routine.cc | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/src/routine.cc b/src/routine.cc index 5f9b1c89..11c4281e 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -88,12 +88,21 @@ StatusCode Routine<T>::SetUp() { // Adds the name of the routine as a define defines += "#define ROUTINE_"+routine_name_+"\n"; + // Determines whether this is a specific device + const auto isAMD = device_.Vendor() == "AMD" || device_.Vendor() == "Advanced Micro Devices, Inc."; + const auto isGPU = device_.Type() == "GPU"; + // For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve // performance, but might result in a reduced accuracy. - if (device_.Vendor() == "AMD") { + if (isAMD && isGPU) { defines += "#define USE_CL_MAD 1\n"; } + // For specific devices, use staggered/shuffled workgroup indices. + if (isAMD && isGPU) { + defines += "#define USE_STAGGERED_INDICES 1\n"; + } + // Combines everything together into a single source string auto source_string = defines + common_header + source_string_; |