diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-24 21:32:42 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-24 21:32:42 +0200 |
commit | 2965b87dda80ca22bf12527755ef9f3cb5fed46b (patch) | |
tree | f8ccc7a509eb36fd96908ba53060baa326238f8c /src/kernels/level3/xgemm_part1.opencl | |
parent | 2b1e0295e6dba8d8d9f85ca65b6232a89e6cceae (diff) |
Added Intel subgroup shuffle support to the 2D register caching GEMM kernel
Diffstat (limited to 'src/kernels/level3/xgemm_part1.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_part1.opencl | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl index d15dafc8..99d64c91 100644 --- a/src/kernels/level3/xgemm_part1.opencl +++ b/src/kernels/level3/xgemm_part1.opencl @@ -114,6 +114,18 @@ R"( #define GLOBAL_MEM_FENCE 0 // Global synchronisation barrier for potential better performance #endif +// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.txt) +#ifndef USE_SUBGROUP_SHUFFLING + #define USE_SUBGROUP_SHUFFLING 0 // Optionally enables subgroup shuffling for Intel GPUs +#endif +#if USE_SUBGROUP_SHUFFLING == 1 + #define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs +#endif +#if NWI != SUBGROUP_SIZE || MDIMC < SUBGROUP_SIZE + #undef USE_SUBGROUP_SHUFFLING + #define USE_SUBGROUP_SHUFFLING 0 // Disables subgroups in case the assumptions don't hold +#endif + // ================================================================================================= // Data-widths in dimension M |