From 6b533dda1ce8b4feda68708dec779ddc6200480c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 18 Dec 2016 11:54:32 +0100 Subject: Fixed a bug when using offsets in the direct GEMM kernels --- src/kernels/level3/xgemm_direct_part3.opencl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/kernels/level3/xgemm_direct_part3.opencl') diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl index a9350e00..c04cdeb8 100644 --- a/src/kernels/level3/xgemm_direct_part3.opencl +++ b/src/kernels/level3/xgemm_direct_part3.opencl @@ -53,13 +53,13 @@ inline void XgemmDirect(const int kSizeM, const int kSizeN, const int kSizeK, for (; kwg < (kSizeK/WGD) * WGD; kwg+=WGD) { // Loads data: off-chip --> local (matrix A and B) - if (a_ld % VWMD == 0) { + if (a_ld % VWMD == 0 && a_offset % VWMD == 0) { GlobalToLocalDirectA(agm, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate); } else { GlobalToLocalScalarA(agms, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate); } - if (b_ld % VWND == 0) { + if (b_ld % VWND == 0 && b_offset % VWND == 0) { GlobalToLocalDirectB(bgm, blm, b_ld, b_offset, kwg, b_transpose, b_conjugate); } else { -- cgit v1.2.3