From 8f01c644b5c62958c1dcd4fd72b411f3805b81a6 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Tue, 16 Jun 2015 07:43:19 +0200 Subject: Added support for complex conjugate transpose --- src/routines/xgemm.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/routines') diff --git a/src/routines/xgemm.cc b/src/routines/xgemm.cc index 16bbc154..db10899c 100644 --- a/src/routines/xgemm.cc +++ b/src/routines/xgemm.cc @@ -63,6 +63,10 @@ StatusCode Xgemm::DoGemm(const Layout layout, auto b_do_transpose = !b_rotated; auto c_do_transpose = c_rotated; + // In case of complex data-types, the transpose can also become a conjugate transpose + auto a_conjugate = (a_transpose == Transpose::kConjugate); + auto b_conjugate = (b_transpose == Transpose::kConjugate); + // Computes the first and second dimensions of the 3 matrices taking into account whether the // matrices are rotated or not auto a_one = (a_rotated) ? k : m; @@ -104,18 +108,18 @@ StatusCode Xgemm::DoGemm(const Layout layout, // them up until they reach a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, m_ceiled, k_ceiled, m_ceiled, 0, temp_a, - a_do_transpose, true, program); + a_do_transpose, a_conjugate, true, program); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(b_one, b_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b, - b_do_transpose, true, program); + b_do_transpose, b_conjugate, true, program); if (ErrorIn(status)) { return status; } // Only necessary for matrix C if it used both as input and output if (beta != static_cast(0)) { status = PadCopyTransposeMatrix(c_one, c_two, c_ld, c_offset, c_buffer, m_ceiled, n_ceiled, m_ceiled, 0, temp_c, - c_do_transpose, true, program); + c_do_transpose, false, true, program); if (ErrorIn(status)) { return status; } } @@ -147,7 +151,7 @@ StatusCode Xgemm::DoGemm(const Layout layout, // Runs the post-processing kernel status = PadCopyTransposeMatrix(m_ceiled, n_ceiled, m_ceiled, 0, temp_c, c_one, c_two, c_ld, c_offset, c_buffer, - c_do_transpose, false, program); + c_do_transpose, false, false, program); if (ErrorIn(status)) { return status; } // Successfully finished the computation -- cgit v1.2.3