summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-07-31 21:46:47 +0200
committerGitHub <noreply@github.com>2017-07-31 21:46:47 +0200
commit27ab3deee7e244a855a1633c8e04bc9375736b81 (patch)
tree308dab66b01229dd420ef3e9d7b1f5e462b7706f
parent1155c068e982b1af19230c4c2d2e6dcb1d495414 (diff)
parent99afdcd9080e4b2e8b26d4bd7324a92714c71e89 (diff)
Merge pull request #180 from mcian/restore_direct_GEMM
Restore direct gemm
-rw-r--r--src/routines/level3/xgemm.cpp10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index f4611aba..136eec43 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -280,11 +280,13 @@ void Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
kernel.SetArgument(16, static_cast<int>(b_conjugate));
// Computes the global and local thread sizes
- //const auto m_ceiled = Ceil(m, db_["WGD"]);
- //const auto n_ceiled = Ceil(n, db_["WGD"]);
+ const auto m_ceiled = Ceil(m, db_["WGD"]);
+ const auto n_ceiled = Ceil(n, db_["WGD"]);
const auto global = std::vector<size_t>{
- CeilDiv(m * db_["MDIMCD"], db_["WGD"]),
- CeilDiv(n * db_["NDIMCD"], db_["WGD"])
+ // CeilDiv(m * db_["MDIMCD"], db_["WGD"]),
+ // CeilDiv(n * db_["NDIMCD"], db_["WGD"])
+ (m_ceiled * db_["MDIMCD"]) / db_["WGD"],
+ (n_ceiled * db_["NDIMCD"]) / db_["WGD"]
};
const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]};