From c0e41b87cb772d43ab8bf35d650d7a98037f155d Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 30 Nov 2018 20:23:26 +0100 Subject: Fixed an issue for unequal MWG and NWG and the new GEMMK == 1 kernel --- src/routines/level3/xgemm.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/routines/level3') diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index cb24460a..6daa0fcf 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -216,9 +216,11 @@ void Xgemm::GemmIndirect(const size_t m, const size_t n, const size_t k, kernel.SetArgument(9, static_cast(c_temp_offset / db_["VWM"])); // Computes the global and local thread sizes + const auto global_divider_one = c_want_rotated_(db_["GEMMK"]) ? db_["NWG"] : db_["MWG"]; + const auto global_divider_two = c_want_rotated_(db_["GEMMK"]) ? db_["MWG"] : db_["NWG"]; const auto global = std::vector{ - (c_one_i * db_["MDIMC"]) / db_["MWG"], - (c_two_i * db_["NDIMC"]) / db_["NWG"] + (c_one_i * db_["MDIMC"]) / global_divider_one, + (c_two_i * db_["NDIMC"]) / global_divider_two }; const auto local = std::vector{db_["MDIMC"], db_["NDIMC"]}; -- cgit v1.2.3