diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-09-15 16:53:09 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-09-15 16:53:09 +0200 |
commit | 8ac39fa3310ba4a66992ccfce839195c31acf688 (patch) | |
tree | 8ff196acbd1e2ec681d96e2290d3223624435e86 | |
parent | 51cc346751528d58d7edf656b710ce4b5ae40fd5 (diff) |
Disabled Intel subgroup shuffling for double-precision
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | src/utilities/compile.cpp | 3 |
2 files changed, 3 insertions, 1 deletions
@@ -5,6 +5,7 @@ Development (next version) - The tuners now check beforehand on invalid local thread sizes and skip those completely - Fixed an issue with conjugate transpose not being executed in certain cases for a.o. XOMATCOPY - Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel +- Fixed an issue with the preprocessor and the new GEMMK == 1 kernel - Various minor fixes and enhancements Version 1.4.1 diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp index 835f54b4..00cb90cb 100644 --- a/src/utilities/compile.cpp +++ b/src/utilities/compile.cpp @@ -59,7 +59,8 @@ std::shared_ptr<Program> CompileFromSource( } // For Intel GPUs with subgroup support, use subgroup shuffling. - if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) { + if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups) && + (precision == Precision::kSingle || precision == Precision::kHalf)) { header_string += "#define USE_SUBGROUP_SHUFFLING 1\n"; header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n"; } |