From 4cf516cfec7866776c822af6e41741699102ba26 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 30 Jun 2017 21:57:41 +0200 Subject: Fixed an if-statement in the direct GEMM kernel causing a bug with specific sets of input parameters --- src/kernels/level3/xgemm_direct_part2.opencl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/kernels') diff --git a/src/kernels/level3/xgemm_direct_part2.opencl b/src/kernels/level3/xgemm_direct_part2.opencl index fc09307e..3af14bff 100644 --- a/src/kernels/level3/xgemm_direct_part2.opencl +++ b/src/kernels/level3/xgemm_direct_part2.opencl @@ -255,7 +255,8 @@ inline void GlobalToLocalCheckedA(const __global real* restrict agms, __local re int idk = (a_transpose) ? kg + GetGroupID0()*WGD : kg + kwg; // Loads the data from global memory into the local memory - int condition = (a_transpose) ? idm < kSizeK : idm < kSizeM; + int condition = (a_transpose) ? (idm < kSizeK) && (idk < kSizeM) : + (idm < kSizeM) && (idk < kSizeK); if (condition) { real result = agms[idk*a_ld + idm + a_offset]; if (a_conjugate) { COMPLEX_CONJUGATE(result); } @@ -293,7 +294,8 @@ inline void GlobalToLocalCheckedB(const __global real* restrict bgms, __local re int idk = (b_transpose) ? kg + GetGroupID1()*WGD : kg + kwg; // Loads the data from global memory into the local memory - int condition = (b_transpose) ? idn < kSizeK : idn < kSizeN; + int condition = (b_transpose) ? (idn < kSizeK) && (idk < kSizeN) : + (idn < kSizeN) && (idk < kSizeK); if (condition) { real result = bgms[idk*b_ld + idn + b_offset]; if (b_conjugate) { COMPLEX_CONJUGATE(result); } -- cgit v1.2.3