summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-10-22 10:41:02 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-10-22 10:41:02 +0200
commitdb17b1fbe9267ccd3ad8a7c419689d7b7b98a763 (patch)
treea277d044f361596a0febff3cdaa7101b982aac64 /src
parent597974b40d52d9cd199e5189d6d55cd12c841e63 (diff)
Fixed a bug in the SYRK/SYR2K/HERK/HER2K routines that would occur with specific tuning parameters
Diffstat (limited to 'src')
-rw-r--r--src/kernels/level3/xgemm_part3.opencl2
-rw-r--r--src/routines/level3/xher2k.cpp2
-rw-r--r--src/routines/level3/xherk.cpp2
-rw-r--r--src/routines/level3/xsyr2k.cpp2
-rw-r--r--src/routines/level3/xsyrk.cpp2
5 files changed, 5 insertions, 5 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl
index a5faef5a..3b26e943 100644
--- a/src/kernels/level3/xgemm_part3.opencl
+++ b/src/kernels/level3/xgemm_part3.opencl
@@ -153,7 +153,7 @@ void XgemmLower(const int kSizeN, const int kSizeK,
const real beta = GetRealArg(arg_beta);
// Skip these threads if they do not contain threads contributing to the lower-triangle
- if (GetGroupID1()*NWG > GetGroupID0()*MWG) {
+ if (GetGroupID1()*NWG > (GetGroupID0() + 1)*MWG) {
return;
}
diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp
index ba770065..bf328729 100644
--- a/src/routines/level3/xher2k.cpp
+++ b/src/routines/level3/xher2k.cpp
@@ -79,7 +79,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
if (ErrorIn(status)) { return status; }
// Calculates the ceiled versions of n and k
- auto n_ceiled = Ceil(n, db_["NWG"]);
+ auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
auto k_ceiled = Ceil(k, db_["KWG"]);
// Decides which kernel to run: the upper-triangular or lower-triangular version
diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp
index 3063f3bc..77422526 100644
--- a/src/routines/level3/xherk.cpp
+++ b/src/routines/level3/xherk.cpp
@@ -76,7 +76,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
if (ErrorIn(status)) { return status; }
// Calculates the ceiled versions of n and k
- auto n_ceiled = Ceil(n, db_["NWG"]);
+ auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
auto k_ceiled = Ceil(k, db_["KWG"]);
// Decides which kernel to run: the upper-triangular or lower-triangular version
diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp
index 158cd9e5..badf3100 100644
--- a/src/routines/level3/xsyr2k.cpp
+++ b/src/routines/level3/xsyr2k.cpp
@@ -75,7 +75,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
if (ErrorIn(status)) { return status; }
// Calculates the ceiled versions of n and k
- auto n_ceiled = Ceil(n, db_["NWG"]);
+ auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
auto k_ceiled = Ceil(k, db_["KWG"]);
// Decides which kernel to run: the upper-triangular or lower-triangular version
diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp
index e1a72ef6..438aa218 100644
--- a/src/routines/level3/xsyrk.cpp
+++ b/src/routines/level3/xsyrk.cpp
@@ -71,7 +71,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
if (ErrorIn(status)) { return status; }
// Calculates the ceiled versions of n and k
- auto n_ceiled = Ceil(n, db_["NWG"]);
+ auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
auto k_ceiled = Ceil(k, db_["KWG"]);
// Decides which kernel to run: the upper-triangular or lower-triangular version