From 2b1e0295e6dba8d8d9f85ca65b6232a89e6cceae Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 24 Apr 2018 20:41:15 +0200 Subject: Added a define to enable subgroup shuffling if supported by the device --- src/utilities/compile.cpp | 5 +++++ src/utilities/utilities.hpp | 1 + 2 files changed, 6 insertions(+) (limited to 'src') diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index c1d921a4..65131cca 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -57,6 +57,11 @@ Program CompileFromSource(const std::string &source_string, const Precision prec header_string += "#define GLOBAL_MEM_FENCE 1\n"; } + // For Intel GPUs with subgroup support, use subgroup shuffling. + if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { + header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels #ifdef CUDA_API header_string += diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index d382b331..0edf77fe 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -47,6 +47,7 @@ using double2 = std::complex; // Khronos OpenCL extensions const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query"; const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query"; +const std::string kKhronosIntelSubgroups = "cl_intel_subgroups"; // Catched an unknown error constexpr auto kUnknownError = -999; -- cgit v1.2.3