diff options
author | Tyler Sorensen <tylersorensen3221@hotmail.com> | 2018-07-11 15:12:22 -0400 |
---|---|---|
committer | Tyler Sorensen <tylersorensen3221@hotmail.com> | 2018-07-11 15:12:22 -0400 |
commit | 7f2e98a1406da6c5293f0c988df95edc246ef88d (patch) | |
tree | 5fdb94393d54cff495f7f2e6a2f0edc6df1eebd8 /src/utilities | |
parent | 7bae54f61f8a2b589421afd57c9da6c8775155ef (diff) |
added inline ptx to support shuffle on Nvidia GPUs
Diffstat (limited to 'src/utilities')
-rw-r--r-- | src/utilities/compile.cpp | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index 05c29944..cd0b3d2b 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -58,11 +58,27 @@ std::shared_ptr<Program> CompileFromSource( header_string += "#define GLOBAL_MEM_FENCE 1\n"; } - // For Intel GPUs with subgroup support, use subgroup shuffling. - if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { + // For GPUs with subgroup support, use subgroup shuffling. + // Currently these are Intel via an extension and Nvidia using inline PTX (restricted to 32 bit) + if (device.IsGPU() && (device.HasExtension(kKhronosIntelSubgroups) || + (device.IsNVIDIA() && static_cast<int>(precision) == 32))) { header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; - } + // Define the flavor of subgroup + if (device.IsNVIDIA()) { + header_string += "#define NVIDIA_WARPS_AS_SUBGROUPS 1\n"; + + // Nvidia additionally needs to check pre or post volta due to new + // shuffle commands + if (device.IsPostNVIDIAVolta()) { + header_string += "#define NVIDIA_POST_VOLTA 1\n"; + } + } + else if (device.HasExtension(kKhronosIntelSubgroups)) { + header_string += "#define INTEL_SUBGROUP_EXTENSION 1\n"; + } + } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels #ifdef CUDA_API header_string += |