summaryrefslogtreecommitdiff
path: root/src/routines/level3
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-11-30 20:23:26 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-11-30 20:23:26 +0100
commitc0e41b87cb772d43ab8bf35d650d7a98037f155d (patch)
tree3140ec86cefbfe0ade6cb5a3fc02afebe86780d4 /src/routines/level3
parentbca1506e870ccc256e7dcf8aaf19f900652e91ba (diff)
Fixed an issue for unequal MWG and NWG and the new GEMMK == 1 kernel
Diffstat (limited to 'src/routines/level3')
-rw-r--r--src/routines/level3/xgemm.cpp6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index cb24460a..6daa0fcf 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -216,9 +216,11 @@ void Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k,
kernel.SetArgument(9, static_cast<int>(c_temp_offset / db_["VWM"]));
// Computes the global and local thread sizes
+ const auto global_divider_one = c_want_rotated_(db_["GEMMK"]) ? db_["NWG"] : db_["MWG"];
+ const auto global_divider_two = c_want_rotated_(db_["GEMMK"]) ? db_["MWG"] : db_["NWG"];
const auto global = std::vector<size_t>{
- (c_one_i * db_["MDIMC"]) / db_["MWG"],
- (c_two_i * db_["NDIMC"]) / db_["NWG"]
+ (c_one_i * db_["MDIMC"]) / global_divider_one,
+ (c_two_i * db_["NDIMC"]) / global_divider_two
};
const auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};