summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-07-27 23:08:49 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-07-27 23:08:49 +0200
commit03bed8633eade7b22e72389b36e2f63ad8f3809d (patch)
tree32aeb65eb6cfc086d487614fb341ce46918e85aa /src/kernels
parent6a8b9e24f2428c140dac97d8279cbb99d051c59d (diff)
Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level3/xgemm_part3.opencl4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl
index 35ec735c..90de0b3b 100644
--- a/src/kernels/level3/xgemm_part3.opencl
+++ b/src/kernels/level3/xgemm_part3.opencl
@@ -91,8 +91,8 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
#if GEMMK == 1
const __global real* restrict a_ptr = (const __global real* restrict) &agm[0];
const __global real* restrict b_ptr = (const __global real* restrict) &bgm[0];
- const int tid_x = get_global_id(0);
- const int tid_y = get_global_id(1);
+ const int tid_x = get_local_id(0) + MDIMC * GetGroupID0();
+ const int tid_y = get_local_id(1) + NDIMC * GetGroupID1();
#endif
// Combined thread identifier (volatile to disable caching)