summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-10-10 19:12:42 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-10-10 19:12:42 +0200
commit1736c0cef497335beb0cc98c6995d29a6e88137b (patch)
treed2ca9c763ff7e0b96f41973dea4e6cb3d3306b35 /src/kernels
parentc163868e1822a97750b4380f0d9cdd38369f9f0b (diff)
Fixed pre-processor warnings related to the subgroup shuffling
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level3/xgemm_part1.opencl8
1 files changed, 5 insertions, 3 deletions
diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl
index 80a60107..cc03696e 100644
--- a/src/kernels/level3/xgemm_part1.opencl
+++ b/src/kernels/level3/xgemm_part1.opencl
@@ -126,13 +126,15 @@ R"(
#endif
// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt)
-#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL
+#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL == 1
#define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs
#endif
// NVIDIA warps as subgroups using inline PTX (https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html)
-#if USE_SUBGROUP_SHUFFLING == 1 && (SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA)
- #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs
+#if USE_SUBGROUP_SHUFFLING == 1
+ #if SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA == 1 || SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA == 1
+ #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs
+ #endif
#endif
#if NWI != SUBGROUP_SIZE || MDIMC < SUBGROUP_SIZE