From a6ba6470aa45dff3c224da9644b98d49b0cce199 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 4 Feb 2017 14:25:27 +0100 Subject: Added row-major support for TRSV --- src/routines/level2/xtrsv.cpp | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp index 93f4174f..4725b1c1 100644 --- a/src/routines/level2/xtrsv.cpp +++ b/src/routines/level2/xtrsv.cpp @@ -49,8 +49,8 @@ void Xtrsv::Substitution(const Layout layout, const Triangle triangle, (a_transpose != Transpose::kNo && layout != Layout::kColMajor)) ? 0 : 1; // The data is either in the upper or lower triangle - auto is_upper = ((triangle == Triangle::kUpper && a_transpose == Transpose::kNo) || - (triangle == Triangle::kLower && a_transpose != Transpose::kNo)); + const auto is_upper = ((triangle == Triangle::kUpper && a_transpose == Transpose::kNo) || + (triangle == Triangle::kLower && a_transpose != Transpose::kNo)); // Retrieves the kernel from the compiled binary const auto kernel_name = (is_upper) ? "trsv_backward" : "trsv_forward"; @@ -113,22 +113,28 @@ void Xtrsv::DoTrsv(const Layout layout, const Triangle triangle, n, x_inc, x_offset, x_buffer, ConstantZero()); fill_vector_event.WaitForCompletion(); - // TODO: Not working for row-major at the moment - const auto is_transposed = ((a_transpose == Transpose::kNo && layout == Layout::kRowMajor) || - (a_transpose != Transpose::kNo && layout != Layout::kRowMajor)); + // The data is either in the upper or lower triangle + const auto is_upper = ((triangle == Triangle::kUpper && a_transpose == Transpose::kNo) || + (triangle == Triangle::kLower && a_transpose != Transpose::kNo)); // Loops over the blocks auto col = n; // the initial column position for (auto i = size_t{0}; i < n; i += TRSV_BLOCK_SIZE) { const auto block_size = std::min(TRSV_BLOCK_SIZE, n - i); - if (!is_transposed) { + // Sets the next column position + col = (is_upper) ? col - block_size : i; + + // Sets the offsets for upper or lower triangular + const auto extra_offset_x = (is_upper) ? (col+block_size)*x_inc : 0; + const auto extra_offset_b = col*x_inc; + + if (a_transpose == Transpose::kNo) { // Sets the offsets for upper or lower triangular - col = (triangle == Triangle::kUpper) ? col - block_size : i; - const auto extra_offset_a = (triangle == Triangle::kUpper) ? col + (col+block_size)*a_ld : col; - const auto extra_offset_x = (triangle == Triangle::kUpper) ? (col+block_size)*x_inc : 0; - const auto extra_offset_b = col*x_inc; + const auto extra_offset_a = (layout == Layout::kColMajor) ? + ((triangle == Triangle::kUpper) ? col + (col+block_size)*a_ld : col) : + ((triangle == Triangle::kUpper) ? col+block_size + (col)*a_ld : col*a_ld); // Runs the GEMV routine to compute x' = A * x if (i > 0) { @@ -141,10 +147,9 @@ void Xtrsv::DoTrsv(const Layout layout, const Triangle triangle, else { // Sets the offsets for upper or lower triangular - col = (triangle == Triangle::kLower) ? col - block_size : i; - const auto extra_offset_a = (triangle == Triangle::kLower) ? col+block_size + col*a_ld : col*a_ld; - const auto extra_offset_x = (triangle == Triangle::kLower) ? (col+block_size)*x_inc : 0; - const auto extra_offset_b = col*x_inc; + const auto extra_offset_a = (layout == Layout::kColMajor) ? + ((triangle == Triangle::kLower) ? col+block_size + col*a_ld : col*a_ld) : + ((triangle == Triangle::kLower) ? col + (col+block_size)*a_ld : col); // Runs the GEMV routine to compute x' = A * x if (i > 0) { -- cgit v1.2.3