summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-07-25 20:53:12 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-07-25 20:53:12 +0200
commit0ea16a0e635ebc9699bc83517b95f33547fadd3e (patch)
tree7adb8c8d48121bcf7690aa68b3e028929d81778e /src/routines
parentb7473f50dfb96f0a4a15ce7d05102ef2d810a7de (diff)
Minor optimization for the direct GEMM kernel: don't ceil m and n unnecessarily high
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/level3/xgemm.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 30e5999c..f4611aba 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -280,11 +280,11 @@ void Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
kernel.SetArgument(16, static_cast<int>(b_conjugate));
// Computes the global and local thread sizes
- const auto m_ceiled = Ceil(m, db_["WGD"]);
- const auto n_ceiled = Ceil(n, db_["WGD"]);
+ //const auto m_ceiled = Ceil(m, db_["WGD"]);
+ //const auto n_ceiled = Ceil(n, db_["WGD"]);
const auto global = std::vector<size_t>{
- (m_ceiled * db_["MDIMCD"]) / db_["WGD"],
- (n_ceiled * db_["NDIMCD"]) / db_["WGD"]
+ CeilDiv(m * db_["MDIMCD"], db_["WGD"]),
+ CeilDiv(n * db_["NDIMCD"], db_["WGD"])
};
const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]};