diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-12-18 11:54:32 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-12-18 11:54:32 +0100 |
commit | 6b533dda1ce8b4feda68708dec779ddc6200480c (patch) | |
tree | 54a041e38a35b567f59c15bc44afaae809747186 /src/kernels | |
parent | 26e017743191e188bc4ae7c7148a6025cfd74422 (diff) |
Fixed a bug when using offsets in the direct GEMM kernels
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level3/xgemm_direct_part2.opencl | 4 | ||||
-rw-r--r-- | src/kernels/level3/xgemm_direct_part3.opencl | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/kernels/level3/xgemm_direct_part2.opencl b/src/kernels/level3/xgemm_direct_part2.opencl index d77cbf65..fc09307e 100644 --- a/src/kernels/level3/xgemm_direct_part2.opencl +++ b/src/kernels/level3/xgemm_direct_part2.opencl @@ -42,7 +42,7 @@ inline void GlobalToLocalDirectA(const __global realMD* restrict agm, __local re int idk = (a_transpose) ? kg + GetGroupID0()*WGD : kg + kwg; // Loads the data from global memory into the local memory - const realMD avec = agm[idk*(a_ld/VWMD) + idm + a_offset]; + const realMD avec = agm[idk*(a_ld/VWMD) + idm + (a_offset/VWMD)]; #if VWMD == 1 alm[kg*(WGD + PADA) + mg] = avec; #elif VWMD == 2 @@ -113,7 +113,7 @@ inline void GlobalToLocalDirectB(const __global realND* restrict bgm, __local re int idk = (b_transpose) ? kg + GetGroupID1()*WGD : kg + kwg; // Loads the data from global memory into the local memory - const realND bvec = bgm[idk*(b_ld/VWND) + idn + b_offset]; + const realND bvec = bgm[idk*(b_ld/VWND) + idn + (b_offset/VWND)]; #if VWND == 1 blm[kg*(WGD + PADB) + ng] = bvec; #elif VWND == 2 diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl index a9350e00..c04cdeb8 100644 --- a/src/kernels/level3/xgemm_direct_part3.opencl +++ b/src/kernels/level3/xgemm_direct_part3.opencl @@ -53,13 +53,13 @@ inline void XgemmDirect(const int kSizeM, const int kSizeN, const int kSizeK, for (; kwg < (kSizeK/WGD) * WGD; kwg+=WGD) { // Loads data: off-chip --> local (matrix A and B) - if (a_ld % VWMD == 0) { + if (a_ld % VWMD == 0 && a_offset % VWMD == 0) { GlobalToLocalDirectA(agm, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate); } else { GlobalToLocalScalarA(agms, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate); } - if (b_ld % VWND == 0) { + if (b_ld % VWND == 0 && b_offset % VWND == 0) { GlobalToLocalDirectB(bgm, blm, b_ld, b_offset, kwg, b_transpose, b_conjugate); } else { |