summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-06-30 21:57:41 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-06-30 21:57:41 +0200
commit4cf516cfec7866776c822af6e41741699102ba26 (patch)
tree8cb074b0673ecf7fa91f18c5d80f5908dad28289 /src
parent52881f386450a3cdd482c81b6f0bf8565a40a860 (diff)
Fixed an if-statement in the direct GEMM kernel causing a bug with specific sets of input parameters
Diffstat (limited to 'src')
-rw-r--r--src/kernels/level3/xgemm_direct_part2.opencl6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part2.opencl b/src/kernels/level3/xgemm_direct_part2.opencl
index fc09307e..3af14bff 100644
--- a/src/kernels/level3/xgemm_direct_part2.opencl
+++ b/src/kernels/level3/xgemm_direct_part2.opencl
@@ -255,7 +255,8 @@ inline void GlobalToLocalCheckedA(const __global real* restrict agms, __local re
int idk = (a_transpose) ? kg + GetGroupID0()*WGD : kg + kwg;
// Loads the data from global memory into the local memory
- int condition = (a_transpose) ? idm < kSizeK : idm < kSizeM;
+ int condition = (a_transpose) ? (idm < kSizeK) && (idk < kSizeM) :
+ (idm < kSizeM) && (idk < kSizeK);
if (condition) {
real result = agms[idk*a_ld + idm + a_offset];
if (a_conjugate) { COMPLEX_CONJUGATE(result); }
@@ -293,7 +294,8 @@ inline void GlobalToLocalCheckedB(const __global real* restrict bgms, __local re
int idk = (b_transpose) ? kg + GetGroupID1()*WGD : kg + kwg;
// Loads the data from global memory into the local memory
- int condition = (b_transpose) ? idn < kSizeK : idn < kSizeN;
+ int condition = (b_transpose) ? (idn < kSizeK) && (idk < kSizeN) :
+ (idn < kSizeN) && (idk < kSizeK);
if (condition) {
real result = bgms[idk*b_ld + idn + b_offset];
if (b_conjugate) { COMPLEX_CONJUGATE(result); }