diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-07-23 19:43:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-23 19:43:03 +0200 |
commit | f8fb707fa440d1ce8b319bec8efe3c20d21dcd37 (patch) | |
tree | 6ef6563e81559ae728c80b77ec5375e5c52ee444 /src/utilities/compile.cpp | |
parent | db179a1e40a3ea37e5dd8ad293aad9b651947668 (diff) | |
parent | 0772d63498c8eeddc380902ba6010a1a861763cc (diff) |
Merge pull request #297 from tyler-utah/master
inline PTX to support subgroup shuffle for Nvidia GPUs
Diffstat (limited to 'src/utilities/compile.cpp')
-rw-r--r-- | src/utilities/compile.cpp | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index 05c29944..835f54b4 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -61,8 +61,22 @@ std::shared_ptr<Program> CompileFromSource( // For Intel GPUs with subgroup support, use subgroup shuffling. if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n"; } + // For NVIDIA GPUs, inline PTX can provide subgroup support + if (device.IsGPU() && device.IsNVIDIA() && precision == Precision::kSingle) { + header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; + + // Nvidia needs to check pre or post volta due to new shuffle commands + if (device.IsPostNVIDIAVolta()) { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 1\n"; + } + else { + header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 1\n"; + } + } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels #ifdef CUDA_API header_string += |