diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-07-27 23:08:49 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-07-27 23:08:49 +0200 |
commit | 03bed8633eade7b22e72389b36e2f63ad8f3809d (patch) | |
tree | 32aeb65eb6cfc086d487614fb341ce46918e85aa /src/kernels | |
parent | 6a8b9e24f2428c140dac97d8279cbb99d051c59d (diff) |
Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level3/xgemm_part3.opencl | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index 35ec735c..90de0b3b 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -91,8 +91,8 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, #if GEMMK == 1 const __global real* restrict a_ptr = (const __global real* restrict) &agm[0]; const __global real* restrict b_ptr = (const __global real* restrict) &bgm[0]; - const int tid_x = get_global_id(0); - const int tid_y = get_global_id(1); + const int tid_x = get_local_id(0) + MDIMC * GetGroupID0(); + const int tid_y = get_local_id(1) + NDIMC * GetGroupID1(); #endif // Combined thread identifier (volatile to disable caching) |