From 03bed8633eade7b22e72389b36e2f63ad8f3809d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 27 Jul 2018 23:08:49 +0200 Subject: Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel --- src/kernels/level3/xgemm_part3.opencl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/kernels') diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index 35ec735c..90de0b3b 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -91,8 +91,8 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #if GEMMK == 1 const __global real* restrict a_ptr = (const __global real* restrict) &agm[0]; const __global real* restrict b_ptr = (const __global real* restrict) &bgm[0]; - const int tid_x = get_global_id(0); - const int tid_y = get_global_id(1); + const int tid_x = get_local_id(0) + MDIMC * GetGroupID0(); + const int tid_y = get_local_id(1) + NDIMC * GetGroupID1(); #endif // Combined thread identifier (volatile to disable caching) -- cgit v1.2.3