diff options
author | Tyler Sorensen <tylersorensen3221@hotmail.com> | 2018-07-14 19:50:47 -0400 |
---|---|---|
committer | Tyler Sorensen <tylersorensen3221@hotmail.com> | 2018-07-14 19:50:47 -0400 |
commit | 7709a7308bce5492e06d8867a4dd9dff5b2ba950 (patch) | |
tree | ed35acf41257752ec165480c2298edf17080da4c /src/utilities | |
parent | 36093429fd444d0a1fc7de25dfaf7f2f775cfabc (diff) |
Applied feedback from Cedric from first pull request
Diffstat (limited to 'src/utilities')
-rw-r--r-- | src/utilities/compile.cpp | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index cd0b3d2b..835f54b4 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -58,24 +58,22 @@ std::shared_ptr<Program> CompileFromSource( header_string += "#define GLOBAL_MEM_FENCE 1\n"; } - // For GPUs with subgroup support, use subgroup shuffling. - // Currently these are Intel via an extension and Nvidia using inline PTX (restricted to 32 bit) - if (device.IsGPU() && (device.HasExtension(kKhronosIntelSubgroups) || - (device.IsNVIDIA() && static_cast<int>(precision) == 32))) { + // For Intel GPUs with subgroup support, use subgroup shuffling. + if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n"; + } - // Define the flavor of subgroup - if (device.IsNVIDIA()) { - header_string += "#define NVIDIA_WARPS_AS_SUBGROUPS 1\n"; + // For NVIDIA GPUs, inline PTX can provide subgroup support + if (device.IsGPU() && device.IsNVIDIA() && precision == Precision::kSingle) { + header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; - // Nvidia additionally needs to check pre or post volta due to new - // shuffle commands - if (device.IsPostNVIDIAVolta()) { - header_string += "#define NVIDIA_POST_VOLTA 1\n"; - } + // Nvidia needs to check pre or post volta due to new shuffle commands + if (device.IsPostNVIDIAVolta()) { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 1\n"; } - else if (device.HasExtension(kKhronosIntelSubgroups)) { - header_string += "#define INTEL_SUBGROUP_EXTENSION 1\n"; + else { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 1\n"; } } |