From 7709a7308bce5492e06d8867a4dd9dff5b2ba950 Mon Sep 17 00:00:00 2001 From: Tyler Sorensen Date: Sat, 14 Jul 2018 19:50:47 -0400 Subject: Applied feedback from Cedric from first pull request --- src/kernels/level3/xgemm_part1.opencl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'src/kernels/level3/xgemm_part1.opencl') diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl index 9e483b3e..32386312 100644 --- a/src/kernels/level3/xgemm_part1.opencl +++ b/src/kernels/level3/xgemm_part1.opencl @@ -114,26 +114,27 @@ R"( #define GLOBAL_MEM_FENCE 0 // Global synchronisation barrier for potential better performance #endif -#ifndef NVIDIA_WARPS_AS_SUBGROUPS - #define NVIDIA_WARPS_AS_SUBGROUPS 0 +#ifndef SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA + #define SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA 0 #endif -#ifndef NVIDIA_POST_VOLTA - #define NVIDIA_POST_VOLTA 0 +#ifndef SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA + #define SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA 0 #endif -#ifndef INTEL_SUBGROUP_EXTENSION - #define INTEL_SUBGROUP_EXTENSION 0 +#ifndef SUBGROUP_SHUFFLING_INTEL + #define SUBGROUP_SHUFFLING_INTEL 0 #endif -//#ifndef USE_SUBGROUP_SHUFFLING +#ifndef USE_SUBGROUP_SHUFFLING #define USE_SUBGROUP_SHUFFLING 0 // Optionally enables subgroup shuffling for Intel GPUs -//#endif +#endif // Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt) -#if USE_SUBGROUP_SHUFFLING == 1 && INTEL_SUBGROUP_EXTENSION +#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL #define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs #endif // NVIDIA warps as subgroups using inline PTX (https://docs.nvidia.com/cuda/inline-ptx-assembly/index.html) -#if USE_SUBGROUP_SHUFFLING == 1 && NVIDIA_WARPS_AS_SUBGROUPS +#if USE_SUBGROUP_SHUFFLING == 1 && (SUBGROUP_SHUFFLING_NVIDIA_PRE_VOLTA || \ + SUBGROUP_SHUFFLING_NVIDIA_POST_VOLTA) #define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on NVIDIA GPUs #endif -- cgit v1.2.3