summaryrefslogtreecommitdiff
path: root/src/routines/level3/xgemm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level3/xgemm.cpp')
-rw-r--r--src/routines/level3/xgemm.cpp13
1 files changed, 7 insertions, 6 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 90e43fe4..2fb9f1fd 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -22,7 +22,8 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm", "XgemmDirect"},
+ PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -299,13 +300,13 @@ StatusCode Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
kernel.SetArgument(18, static_cast<int>(b_conjugate));
// Computes the global and local thread sizes
- const auto m_ceiled = Ceil(m, db_["MWG"]);
- const auto n_ceiled = Ceil(n, db_["NWG"]);
+ const auto m_ceiled = Ceil(m, db_["MWGD"]);
+ const auto n_ceiled = Ceil(n, db_["NWGD"]);
const auto global = std::vector<size_t>{
- (m_ceiled * db_["MDIMC"]) / db_["MWG"],
- (n_ceiled * db_["NDIMC"]) / db_["NWG"]
+ (m_ceiled * db_["MDIMCD"]) / db_["MWGD"],
+ (n_ceiled * db_["NDIMCD"]) / db_["NWGD"]
};
- const auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+ const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]};
// Launches the kernel
auto status = RunKernel(kernel, queue_, device_, global, local, event_);