diff options
Diffstat (limited to 'src/routines/level3/xgemm.cpp')
-rw-r--r-- | src/routines/level3/xgemm.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index 97e8db7e..0db28537 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -127,7 +127,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // case nothing has to be done, these kernels can be skipped. if (!a_no_temp) { auto eventProcessA = Event(); - status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessA.pointer(), emptyEventList, + status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList, a_one, a_two, a_ld, a_offset, a_buffer, m_ceiled, k_ceiled, m_ceiled, 0, a_temp, ConstantOne<T>(), program, @@ -139,7 +139,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // As above, but now for matrix B if (!b_no_temp) { auto eventProcessB = Event(); - status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessB.pointer(), emptyEventList, + status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList, b_one, b_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, b_temp, ConstantOne<T>(), program, @@ -151,7 +151,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // As above, but now for matrix C. This is only necessary if C is used both as input and output. if (!c_no_temp && beta != static_cast<T>(0)) { auto eventProcessC = Event(); - status = PadCopyTransposeMatrix(queue_, device_, context_, db_, eventProcessC.pointer(), emptyEventList, + status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList, c_one, c_two, c_ld, c_offset, c_buffer, m_ceiled, n_ceiled, m_ceiled, 0, c_temp, ConstantOne<T>(), program, @@ -190,7 +190,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // Runs the post-processing kernel if needed if (!c_no_temp) { eventWaitList.push_back(eventKernel); - status = PadCopyTransposeMatrix(queue_, device_, context_, db_, event_, eventWaitList, + status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList, m_ceiled, n_ceiled, m_ceiled, 0, c_temp, c_one, c_two, c_ld, c_offset, c_buffer, ConstantOne<T>(), program, |