From 0dc85845f7e3c901d75ec51769f6828c52bfdfb5 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 13 Jul 2015 08:41:26 +0200 Subject: Updated interface of the PadCopyTransposeMatrix method --- src/routine.cc | 14 +++++++------- src/routines/level3/xgemm.cc | 8 ++++---- src/routines/level3/xher2k.cc | 12 ++++++------ src/routines/level3/xherk.cc | 8 ++++---- src/routines/level3/xsyr2k.cc | 8 ++++---- src/routines/level3/xsyrk.cc | 6 +++--- 6 files changed, 28 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/routine.cc b/src/routine.cc index d11edb0f..339027d4 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -202,17 +202,17 @@ StatusCode Routine::TestVectorY(const size_t n, const Buffer &buffer, const size // ================================================================================================= -// Copies a matrix and pads it with zeros +// Copies or transposes a matrix and pads/unpads it with zeros StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t src_two, const size_t src_ld, const size_t src_offset, const Buffer &src, const size_t dest_one, const size_t dest_two, const size_t dest_ld, const size_t dest_offset, const Buffer &dest, + const Program &program, const bool do_pad, const bool do_transpose, const bool do_conjugate, - const bool pad, const bool upper, const bool lower, - const bool diagonal_imag_zero, - const Program &program) { + const bool upper, const bool lower, + const bool diagonal_imag_zero) { // Determines whether or not the fast-version could potentially be used auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) && @@ -230,7 +230,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr } else { use_fast_kernel = false; - kernel_name = (pad) ? "PadTransposeMatrix" : "UnPadTransposeMatrix"; + kernel_name = (do_pad) ? "PadTransposeMatrix" : "UnPadTransposeMatrix"; } } else { @@ -242,7 +242,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr } else { use_fast_kernel = false; - kernel_name = (pad) ? "PadMatrix" : "UnPadMatrix"; + kernel_name = (do_pad) ? "PadMatrix" : "UnPadMatrix"; } } @@ -267,7 +267,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr kernel.SetArgument(7, static_cast(dest_ld)); kernel.SetArgument(8, static_cast(dest_offset)); kernel.SetArgument(9, dest()); - if (pad) { + if (do_pad) { kernel.SetArgument(10, static_cast(do_conjugate)); } else { diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index f4a9f737..7a854741 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -108,18 +108,18 @@ StatusCode Xgemm::DoGemm(const Layout layout, // them up until they reach a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, m_ceiled, k_ceiled, m_ceiled, 0, temp_a, - a_do_transpose, a_conjugate, true, false, false, false, program); + program, true, a_do_transpose, a_conjugate); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(b_one, b_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b, - b_do_transpose, b_conjugate, true, false, false, false, program); + program, true, b_do_transpose, b_conjugate); if (ErrorIn(status)) { return status; } // Only necessary for matrix C if it used both as input and output if (beta != static_cast(0)) { status = PadCopyTransposeMatrix(c_one, c_two, c_ld, c_offset, c_buffer, m_ceiled, n_ceiled, m_ceiled, 0, temp_c, - c_do_transpose, false, true, false, false, false, program); + program, true, c_do_transpose, false); if (ErrorIn(status)) { return status; } } @@ -151,7 +151,7 @@ StatusCode Xgemm::DoGemm(const Layout layout, // Runs the post-processing kernel status = PadCopyTransposeMatrix(m_ceiled, n_ceiled, m_ceiled, 0, temp_c, c_one, c_two, c_ld, c_offset, c_buffer, - c_do_transpose, false, false, false, false, false, program); + program, false, c_do_transpose, false); if (ErrorIn(status)) { return status; } // Successfully finished the computation diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 6d33a0e1..ec435d8e 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -96,25 +96,25 @@ StatusCode Xher2k::DoHer2k(const Layout layout, const Triangle triangle, co // fill them up until they reach a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(ab_one, ab_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_a1, - ab_rotated, ab_conjugate, true, false, false, false, program); + program, true, ab_rotated, ab_conjugate); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(ab_one, ab_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_a2, - ab_rotated, !ab_conjugate, true, false, false, false, program); + program, true, ab_rotated, !ab_conjugate); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(ab_one, ab_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b1, - ab_rotated, ab_conjugate, true, false, false, false, program); + program, true, ab_rotated, ab_conjugate); status = PadCopyTransposeMatrix(ab_one, ab_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b2, - ab_rotated, !ab_conjugate, true, false, false, false, program); + program, true, ab_rotated, !ab_conjugate); if (ErrorIn(status)) { return status; } // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to // modify the other triangle. status = PadCopyTransposeMatrix(n, n, c_ld, c_offset, c_buffer, n_ceiled, n_ceiled, n_ceiled, 0, temp_c, - c_rotated, false, true, false, false, false, program); + program, true, c_rotated, false); if (ErrorIn(status)) { return status; } // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary @@ -159,7 +159,7 @@ StatusCode Xher2k::DoHer2k(const Layout layout, const Triangle triangle, co auto lower = (triangle == Triangle::kLower); status = PadCopyTransposeMatrix(n_ceiled, n_ceiled, n_ceiled, 0, temp_c, n, n, c_ld, c_offset, c_buffer, - c_rotated, false, false, upper, lower, true, program); + program, false, c_rotated, false, upper, lower, true); if (ErrorIn(status)) { return status; } // Successfully finished the computation diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index 8fae294f..8ad64162 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -92,18 +92,18 @@ StatusCode Xherk::DoHerk(const Layout layout, const Triangle triangle, cons // creates two copies: status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_a, - a_rotated, a_conjugate, true, false, false, false, program); + program, true, a_rotated, a_conjugate); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b, - a_rotated, b_conjugate, true, false, false, false, program); + program, true, a_rotated, b_conjugate); if (ErrorIn(status)) { return status; } // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to // modify the other triangle. status = PadCopyTransposeMatrix(n, n, c_ld, c_offset, c_buffer, n_ceiled, n_ceiled, n_ceiled, 0, temp_c, - c_rotated, false, true, false, false, false, program); + program, true, c_rotated, false); if (ErrorIn(status)) { return status; } // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary @@ -137,7 +137,7 @@ StatusCode Xherk::DoHerk(const Layout layout, const Triangle triangle, cons auto lower = (triangle == Triangle::kLower); status = PadCopyTransposeMatrix(n_ceiled, n_ceiled, n_ceiled, 0, temp_c, n, n, c_ld, c_offset, c_buffer, - c_rotated, false, false, upper, lower, true, program); + program, false, c_rotated, false, upper, lower, true); if (ErrorIn(status)) { return status; } // Successfully finished the computation diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index d54f2fc1..651bc524 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -92,18 +92,18 @@ StatusCode Xsyr2k::DoSyr2k(const Layout layout, const Triangle triangle, cons // fill them up until they reach a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(ab_one, ab_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_a, - ab_rotated, false, true, false, false, false, program); + program, true, ab_rotated, false); if (ErrorIn(status)) { return status; } status = PadCopyTransposeMatrix(ab_one, ab_two, b_ld, b_offset, b_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_b, - ab_rotated, false, true, false, false, false, program); + program, true, ab_rotated, false); if (ErrorIn(status)) { return status; } // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to // modify the other triangle. status = PadCopyTransposeMatrix(n, n, c_ld, c_offset, c_buffer, n_ceiled, n_ceiled, n_ceiled, 0, temp_c, - c_rotated, false, true, false, false, false, program); + program, true, c_rotated, false); if (ErrorIn(status)) { return status; } // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary @@ -145,7 +145,7 @@ StatusCode Xsyr2k::DoSyr2k(const Layout layout, const Triangle triangle, cons auto lower = (triangle == Triangle::kLower); status = PadCopyTransposeMatrix(n_ceiled, n_ceiled, n_ceiled, 0, temp_c, n, n, c_ld, c_offset, c_buffer, - c_rotated, false, false, upper, lower, false, program); + program, false, c_rotated, false, upper, lower, false); if (ErrorIn(status)) { return status; } // Successfully finished the computation diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index bb952410..e10b7689 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -87,14 +87,14 @@ StatusCode Xsyrk::DoSyrk(const Layout layout, const Triangle triangle, const // fill it up until it reaches a certain multiple of size (kernel parameter dependent). status = PadCopyTransposeMatrix(a_one, a_two, a_ld, a_offset, a_buffer, n_ceiled, k_ceiled, n_ceiled, 0, temp_a, - a_rotated, false, true, false, false, false, program); + program, true, a_rotated, false); if (ErrorIn(status)) { return status; } // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to // modify the other triangle. status = PadCopyTransposeMatrix(n, n, c_ld, c_offset, c_buffer, n_ceiled, n_ceiled, n_ceiled, 0, temp_c, - c_rotated, false, true, false, false, false, program); + program, true, c_rotated, false); if (ErrorIn(status)) { return status; } // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary @@ -126,7 +126,7 @@ StatusCode Xsyrk::DoSyrk(const Layout layout, const Triangle triangle, const auto lower = (triangle == Triangle::kLower); status = PadCopyTransposeMatrix(n_ceiled, n_ceiled, n_ceiled, 0, temp_c, n, n, c_ld, c_offset, c_buffer, - c_rotated, false, false, upper, lower, false, program); + program, false, c_rotated, false, upper, lower, false); if (ErrorIn(status)) { return status; } // Successfully finished the computation -- cgit v1.2.3