summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_direct_part3.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-12-18 11:54:32 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2016-12-18 11:54:32 +0100
commit6b533dda1ce8b4feda68708dec779ddc6200480c (patch)
tree54a041e38a35b567f59c15bc44afaae809747186 /src/kernels/level3/xgemm_direct_part3.opencl
parent26e017743191e188bc4ae7c7148a6025cfd74422 (diff)
Fixed a bug when using offsets in the direct GEMM kernels
Diffstat (limited to 'src/kernels/level3/xgemm_direct_part3.opencl')
-rw-r--r--src/kernels/level3/xgemm_direct_part3.opencl4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl
index a9350e00..c04cdeb8 100644
--- a/src/kernels/level3/xgemm_direct_part3.opencl
+++ b/src/kernels/level3/xgemm_direct_part3.opencl
@@ -53,13 +53,13 @@ inline void XgemmDirect(const int kSizeM, const int kSizeN, const int kSizeK,
for (; kwg < (kSizeK/WGD) * WGD; kwg+=WGD) {
// Loads data: off-chip --> local (matrix A and B)
- if (a_ld % VWMD == 0) {
+ if (a_ld % VWMD == 0 && a_offset % VWMD == 0) {
GlobalToLocalDirectA(agm, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate);
}
else {
GlobalToLocalScalarA(agms, alm, a_ld, a_offset, kwg, a_transpose, a_conjugate);
}
- if (b_ld % VWND == 0) {
+ if (b_ld % VWND == 0 && b_offset % VWND == 0) {
GlobalToLocalDirectB(bgm, blm, b_ld, b_offset, kwg, b_transpose, b_conjugate);
}
else {