diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-22 10:41:02 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-22 10:41:02 +0200 |
commit | db17b1fbe9267ccd3ad8a7c419689d7b7b98a763 (patch) | |
tree | a277d044f361596a0febff3cdaa7101b982aac64 /src | |
parent | 597974b40d52d9cd199e5189d6d55cd12c841e63 (diff) |
Fixed a bug in the SYRK/SYR2K/HERK/HER2K routines that would occur with specific tuning parameters
Diffstat (limited to 'src')
-rw-r--r-- | src/kernels/level3/xgemm_part3.opencl | 2 | ||||
-rw-r--r-- | src/routines/level3/xher2k.cpp | 2 | ||||
-rw-r--r-- | src/routines/level3/xherk.cpp | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cpp | 2 | ||||
-rw-r--r-- | src/routines/level3/xsyrk.cpp | 2 |
5 files changed, 5 insertions, 5 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index a5faef5a..3b26e943 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -153,7 +153,7 @@ void XgemmLower(const int kSizeN, const int kSizeK, const real beta = GetRealArg(arg_beta); // Skip these threads if they do not contain threads contributing to the lower-triangle - if (GetGroupID1()*NWG > GetGroupID0()*MWG) { + if (GetGroupID1()*NWG > (GetGroupID0() + 1)*MWG) { return; } diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp index ba770065..bf328729 100644 --- a/src/routines/level3/xher2k.cpp +++ b/src/routines/level3/xher2k.cpp @@ -79,7 +79,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k - auto n_ceiled = Ceil(n, db_["NWG"]); + auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]); auto k_ceiled = Ceil(k, db_["KWG"]); // Decides which kernel to run: the upper-triangular or lower-triangular version diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp index 3063f3bc..77422526 100644 --- a/src/routines/level3/xherk.cpp +++ b/src/routines/level3/xherk.cpp @@ -76,7 +76,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k - auto n_ceiled = Ceil(n, db_["NWG"]); + auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]); auto k_ceiled = Ceil(k, db_["KWG"]); // Decides which kernel to run: the upper-triangular or lower-triangular version diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp index 158cd9e5..badf3100 100644 --- a/src/routines/level3/xsyr2k.cpp +++ b/src/routines/level3/xsyr2k.cpp @@ -75,7 +75,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k - auto n_ceiled = Ceil(n, db_["NWG"]); + auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]); auto k_ceiled = Ceil(k, db_["KWG"]); // Decides which kernel to run: the upper-triangular or lower-triangular version diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp index e1a72ef6..438aa218 100644 --- a/src/routines/level3/xsyrk.cpp +++ b/src/routines/level3/xsyrk.cpp @@ -71,7 +71,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k - auto n_ceiled = Ceil(n, db_["NWG"]); + auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]); auto k_ceiled = Ceil(k, db_["KWG"]); // Decides which kernel to run: the upper-triangular or lower-triangular version |