summaryrefslogtreecommitdiff
path: root/src/kernels/common.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-07-28 14:36:33 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-07-28 14:36:33 +0200
commit0f0baa561b6c215a1052b5c70d72215e2ab38745 (patch)
tree944ee44ac071bd8ca6da9f4a16b9b8c9bba1889f /src/kernels/common.opencl
parent03bed8633eade7b22e72389b36e2f63ad8f3809d (diff)
Disabled the use of staggered indices on AMD GPUs for the new GEMMK == 1 kernels to improve performance
Diffstat (limited to 'src/kernels/common.opencl')
-rw-r--r--src/kernels/common.opencl2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 4a476a8b..0ad38919 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -260,7 +260,7 @@ R"(
// Staggered/shuffled group indices to avoid partition camping (AMD GPUs). Formula's are taken from:
// http://docs.nvidia.com/cuda/samples/6_Advanced/transpose/doc/MatrixTranspose.pdf
// More details: https://github.com/CNugteren/CLBlast/issues/53
-#if USE_STAGGERED_INDICES == 1
+#if USE_STAGGERED_INDICES == 1 && GEMMK == 0
INLINE_FUNC int GetGroupIDFlat() {
return get_group_id(0) + get_num_groups(0) * get_group_id(1);
}