diff options
Diffstat (limited to 'src/routines/level3/xgemm.cpp')
-rw-r--r-- | src/routines/level3/xgemm.cpp | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index 90e43fe4..2fb9f1fd 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -22,7 +22,8 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) { + Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm", "XgemmDirect"}, + PrecisionValue<T>()) { source_string_ = #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -299,13 +300,13 @@ StatusCode Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k, kernel.SetArgument(18, static_cast<int>(b_conjugate)); // Computes the global and local thread sizes - const auto m_ceiled = Ceil(m, db_["MWG"]); - const auto n_ceiled = Ceil(n, db_["NWG"]); + const auto m_ceiled = Ceil(m, db_["MWGD"]); + const auto n_ceiled = Ceil(n, db_["NWGD"]); const auto global = std::vector<size_t>{ - (m_ceiled * db_["MDIMC"]) / db_["MWG"], - (n_ceiled * db_["NDIMC"]) / db_["NWG"] + (m_ceiled * db_["MDIMCD"]) / db_["MWGD"], + (n_ceiled * db_["NDIMCD"]) / db_["NWGD"] }; - const auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]}; + const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]}; // Launches the kernel auto status = RunKernel(kernel, queue_, device_, global, local, event_); |