summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-10-10 19:12:42 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-10-10 19:12:42 +0200
commit1736c0cef497335beb0cc98c6995d29a6e88137b (patch)
treed2ca9c763ff7e0b96f41973dea4e6cb3d3306b35
parentc163868e1822a97750b4380f0d9cdd38369f9f0b (diff)
Fixed pre-processor warnings related to the subgroup shuffling
-rw-r--r--src/kernel_preprocessor.cpp25
-rw-r--r--src/kernels/level3/xgemm_part1.opencl8
2 files changed, 30 insertions, 3 deletions
diff --git a/src/kernel_preprocessor.cpp b/src/kernel_preprocessor.cpp
index 1c422d33..bc2ab540 100644
--- a/src/kernel_preprocessor.cpp
+++ b/src/kernel_preprocessor.cpp
@@ -182,6 +182,31 @@ bool EvaluateCondition(std::string condition,
const auto right = condition.substr(equal_pos + 4);
return (left == right);
}
+
+ // Process the not equal sign
+ const auto not_equal_pos = condition.find(" != ");
+ if (not_equal_pos != std::string::npos) {
+ const auto left = condition.substr(0, not_equal_pos);
+ const auto right = condition.substr(not_equal_pos + 4);
+ return (left != right);
+ }
+
+ // Process the smaller than sign
+ const auto smaller_than_pos = condition.find(" < ");
+ if (smaller_than_pos != std::string::npos) {
+ const auto left = condition.substr(0, smaller_than_pos);
+ const auto right = condition.substr(smaller_than_pos + 3);
+ return (left < right);
+ }
+
+ // Process the larger than sign
+ const auto larger_than_pos = condition.find(" > ");
+ if (larger_than_pos != std::string::npos) {
+ const auto left = condition.substr(0, larger_than_pos);
+ const auto right = condition.substr(larger_than_pos + 3);
+ return (left > right);
+ }
+
printf("Warning unknown condition: %s\n", condition.c_str());
return false; // unknown error
}
diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl
index 80a60107..cc03696e 100644
--- a/src/kernels/level3/xgemm_part1.opencl
+++ b/src/kernels/level3/xgemm_part1.opencl
@@ -126,13 +126,15 @@ R"(
#endif
// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt)
-#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL
+#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL == 1
#define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs
#endif
// NVIDIA warps as subgroups using inline PTX (https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html)
-#if USE_SUBGROUP_SHUFFLING == 1 && (SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA)
- #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs
+#if USE_SUBGROUP_SHUFFLING == 1
+ #if SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA == 1 || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA == 1
+ #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs
+ #endif
#endif
#if NWI != SUBGROUP_SIZE || MDIMC < SUBGROUP_SIZE