diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-24 20:41:15 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-24 20:41:15 +0200 |
commit | 2b1e0295e6dba8d8d9f85ca65b6232a89e6cceae (patch) | |
tree | fb8acd7a24e9ce79cdb81b31ec8b63ccdc0bebeb /src/utilities/compile.cpp | |
parent | 5d46a3193e9034ba567950f166f4ab2de326fee0 (diff) |
Added a define to enable subgroup shuffling if supported by the device
Diffstat (limited to 'src/utilities/compile.cpp')
-rw-r--r-- | src/utilities/compile.cpp | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index c1d921a4..65131cca 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -57,6 +57,11 @@ Program CompileFromSource(const std::string &source_string, const Precision prec header_string += "#define GLOBAL_MEM_FENCE 1\n"; } + // For Intel GPUs with subgroup support, use subgroup shuffling. + if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { + header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels #ifdef CUDA_API header_string += |