summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-09-15 16:53:09 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-09-15 16:53:09 +0200
commit8ac39fa3310ba4a66992ccfce839195c31acf688 (patch)
tree8ff196acbd1e2ec681d96e2290d3223624435e86
parent51cc346751528d58d7edf656b710ce4b5ae40fd5 (diff)
Disabled Intel subgroup shuffling for double-precision
-rw-r--r--CHANGELOG1
-rw-r--r--src/utilities/compile.cpp3
2 files changed, 3 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index f2960fde..27860c85 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,6 +5,7 @@ Development (next version)
- The tuners now check beforehand on invalid local thread sizes and skip those completely
- Fixed an issue with conjugate transpose not being executed in certain cases for a.o. XOMATCOPY
- Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel
+- Fixed an issue with the preprocessor and the new GEMMK == 1 kernel
- Various minor fixes and enhancements
Version 1.4.1
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index 835f54b4..00cb90cb 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -59,7 +59,8 @@ std::shared_ptr<Program> CompileFromSource(
}
// For Intel GPUs with subgroup support, use subgroup shuffling.
- if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) {
+ if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups) &&
+ (precision == Precision::kSingle || precision == Precision::kHalf)) {
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n";
}