summaryrefslogtreecommitdiff
path: root/src/utilities
diff options
context:
space:
mode:
authorTyler Sorensen <tylersorensen3221@hotmail.com>2018-07-14 19:50:47 -0400
committerTyler Sorensen <tylersorensen3221@hotmail.com>2018-07-14 19:50:47 -0400
commit7709a7308bce5492e06d8867a4dd9dff5b2ba950 (patch)
treeed35acf41257752ec165480c2298edf17080da4c /src/utilities
parent36093429fd444d0a1fc7de25dfaf7f2f775cfabc (diff)
Applied feedback from Cedric from first pull request
Diffstat (limited to 'src/utilities')
-rw-r--r--src/utilities/compile.cpp26
1 files changed, 12 insertions, 14 deletions
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index cd0b3d2b..835f54b4 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -58,24 +58,22 @@ std::shared_ptr<Program> CompileFromSource(
header_string += "#define GLOBAL_MEM_FENCE 1\n";
}
- // For GPUs with subgroup support, use subgroup shuffling.
- // Currently these are Intel via an extension and Nvidia using inline PTX (restricted to 32 bit)
- if (device.IsGPU() && (device.HasExtension(kKhronosIntelSubgroups) ||
- (device.IsNVIDIA() && static_cast<int>(precision) == 32))) {
+ // For Intel GPUs with subgroup support, use subgroup shuffling.
+ if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) {
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
+ header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n";
+ }
- // Define the flavor of subgroup
- if (device.IsNVIDIA()) {
- header_string += "#define NVIDIA_WARPS_AS_SUBGROUPS 1\n";
+ // For NVIDIA GPUs, inline PTX can provide subgroup support
+ if (device.IsGPU() && device.IsNVIDIA() && precision == Precision::kSingle) {
+ header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
- // Nvidia additionally needs to check pre or post volta due to new
- // shuffle commands
- if (device.IsPostNVIDIAVolta()) {
- header_string += "#define NVIDIA_POST_VOLTA 1\n";
- }
+ // Nvidia needs to check pre or post volta due to new shuffle commands
+ if (device.IsPostNVIDIAVolta()) {
+ header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 1\n";
}
- else if (device.HasExtension(kKhronosIntelSubgroups)) {
- header_string += "#define INTEL_SUBGROUP_EXTENSION 1\n";
+ else {
+ header_string += "#define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 1\n";
}
}