From 7e176ccac9779bd9929543127108593e0fd3b429 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Tue, 16 Jun 2015 08:42:52 +0200 Subject: Added support for conjugate transpose in GEMV --- src/routines/xgemv.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'src/routines') diff --git a/src/routines/xgemv.cc b/src/routines/xgemv.cc index 9f3908f8..78071c17 100644 --- a/src/routines/xgemv.cc +++ b/src/routines/xgemv.cc @@ -54,13 +54,16 @@ StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, auto a_two = (a_altlayout) ? m : n; // Swap m and n if the matrix is transposed - auto a_transposed = (a_transpose == Transpose::kYes); + auto a_transposed = (a_transpose != Transpose::kNo); auto m_real = (a_transposed) ? n : m; auto n_real = (a_transposed) ? m : n; // Determines whether the kernel needs to perform rotated access ('^' is the XOR operator) auto a_rotated = a_transposed ^ a_altlayout; + // In case of complex data-types, the transpose can also become a conjugate transpose + auto a_conjugate = (a_transpose == Transpose::kConjugate); + // Tests the matrix and the vectors for validity auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); if (ErrorIn(status)) { return status; } @@ -70,11 +73,11 @@ StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used - bool use_fast_kernel = (a_offset == 0) && (a_rotated == 0) && + bool use_fast_kernel = (a_offset == 0) && (a_rotated == 0) && (a_conjugate == 0) && IsMultiple(m, db_["WGS2"]*db_["WPT2"]) && IsMultiple(n, db_["WGS2"]) && IsMultiple(a_ld, db_["VW2"]); - bool use_fast_kernel_rot = (a_offset == 0) && (a_rotated == 1) && + bool use_fast_kernel_rot = (a_offset == 0) && (a_rotated == 1) && (a_conjugate == 0) && IsMultiple(m, db_["WGS3"]*db_["WPT3"]) && IsMultiple(n, db_["WGS3"]) && IsMultiple(a_ld, db_["VW3"]); @@ -115,6 +118,7 @@ StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, kernel.SetArgument(11, y_buffer()); kernel.SetArgument(12, static_cast(y_offset)); kernel.SetArgument(13, static_cast(y_inc)); + kernel.SetArgument(14, static_cast(a_conjugate)); // Launches the kernel auto global = std::vector{global_size}; -- cgit v1.2.3