diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-23 08:09:07 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-23 08:09:07 +0200 |
commit | 20eb3506d63e21725974e16ae392cf0dd4bf4df5 (patch) | |
tree | 8742994285f53f94f727b1853404325103104393 /src/routines | |
parent | 4c2a166bc5406b194108d3b31238e55ac6b99e3c (diff) |
Added a condition to update only lower/upper triangular parts in the un-pad kernels
Diffstat (limited to 'src/routines')
-rw-r--r-- | src/routines/xgemm.cc | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/routines/xgemm.cc b/src/routines/xgemm.cc index 20cd2675..651ebb55 100644 --- a/src/routines/xgemm.cc +++ b/src/routines/xgemm.cc @@ -108,18 +108,18 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // them up until they reach a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, m_ceiled, k_ceiled, m_ceiled, 0, temp_a, - a_do_transpose, a_conjugate, true, program); + a_do_transpose, a_conjugate, true, false, false, program); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(b_one, b_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b, - b_do_transpose, b_conjugate, true, program); + b_do_transpose, b_conjugate, true, false, false, program); if (ErrorIn(status)) { return status; } // Only necessary for matrix C if it used both as input and output if (beta != static_cast<T>(0)) { status = PadCopyTransposeMatrix(c_one, c_two, c_ld, c_offset, c_buffer, m_ceiled, n_ceiled, m_ceiled, 0, temp_c, - c_do_transpose, false, true, program); + c_do_transpose, false, true, false, false, program); if (ErrorIn(status)) { return status; } } @@ -151,7 +151,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // Runs the post-processing kernel status = PadCopyTransposeMatrix(m_ceiled, n_ceiled, m_ceiled, 0, temp_c, c_one, c_two, c_ld, c_offset, c_buffer, - c_do_transpose, false, false, program); + c_do_transpose, false, false, false, false, program); if (ErrorIn(status)) { return status; } // Successfully finished the computation |