summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-04-24 20:41:15 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-04-24 20:41:15 +0200
commit2b1e0295e6dba8d8d9f85ca65b6232a89e6cceae (patch)
treefb8acd7a24e9ce79cdb81b31ec8b63ccdc0bebeb /src
parent5d46a3193e9034ba567950f166f4ab2de326fee0 (diff)
Added a define to enable subgroup shuffling if supported by the device
Diffstat (limited to 'src')
-rw-r--r--src/utilities/compile.cpp5
-rw-r--r--src/utilities/utilities.hpp1
2 files changed, 6 insertions, 0 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index c1d921a4..65131cca 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -57,6 +57,11 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
header_string += "#define GLOBAL_MEM_FENCE 1\n";
}
+ // For Intel GPUs with subgroup support, use subgroup shuffling.
+ if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) {
+ header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
+ }
+
// Optionally adds a translation header from OpenCL kernels to CUDA kernels
#ifdef CUDA_API
header_string +=
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index d382b331..0edf77fe 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -47,6 +47,7 @@ using double2 = std::complex<double>;
// Khronos OpenCL extensions
const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query";
const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query";
+const std::string kKhronosIntelSubgroups = "cl_intel_subgroups";
// Catched an unknown error
constexpr auto kUnknownError = -999;