From 80da67d28bbcff958071befb48ccacac05ebbe49 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 19 Sep 2015 17:40:38 +0200 Subject: Added the HPMV routine --- src/routines/level2/xgbmv.cc | 2 +- src/routines/level2/xgemv.cc | 9 ++++--- src/routines/level2/xhbmv.cc | 2 +- src/routines/level2/xhemv.cc | 2 +- src/routines/level2/xhpmv.cc | 64 ++++++++++++++++++++++++++++++++++++++++++++ src/routines/level2/xsymv.cc | 2 +- 6 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 src/routines/level2/xhpmv.cc (limited to 'src/routines') diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cc index d8815542..14d391ca 100644 --- a/src/routines/level2/xgbmv.cc +++ b/src/routines/level2/xgbmv.cc @@ -52,7 +52,7 @@ StatusCode Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - 0, kl_real, ku_real); + 0, false, kl_real, ku_real); } // ================================================================================================= diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 0c312756..1b768dcd 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -55,7 +55,7 @@ StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, true, true, - 0, 0, 0); // N/A for this routine + 0, false, 0, 0); // N/A for this routine } // ================================================================================================= @@ -69,7 +69,8 @@ StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - bool fast_kernel, bool fast_kernel_rot, const size_t parameter, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, const size_t kl, const size_t ku) { // Makes sure all dimensions are larger than zero @@ -97,7 +98,9 @@ StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, auto a_conjugate = (a_transpose == Transpose::kConjugate); // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } + else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); } if (ErrorIn(status)) { return status; } status = TestVectorX(n_real, x_buffer, x_offset, x_inc, sizeof(T)); if (ErrorIn(status)) { return status; } diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cc index 90c0c787..f59a7cb3 100644 --- a/src/routines/level2/xhbmv.cc +++ b/src/routines/level2/xhbmv.cc @@ -51,7 +51,7 @@ StatusCode Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, k, 0); + is_upper, false, k, 0); } // ================================================================================================= diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cc index efad829b..5a58b28b 100644 --- a/src/routines/level2/xhemv.cc +++ b/src/routines/level2/xhemv.cc @@ -51,7 +51,7 @@ StatusCode Xhemv::DoHemv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, 0, 0); + is_upper, false, 0, 0); } // ================================================================================================= diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cc new file mode 100644 index 00000000..2269255d --- /dev/null +++ b/src/routines/level2/xhpmv.cc @@ -0,0 +1,64 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xhpmv.h" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpmv::Xhpmv(Queue &queue, Event &event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpmv::DoHpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &ap_buffer, const size_t ap_offset, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian packed matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_HPMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + ap_buffer, ap_offset, n, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, true, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpmv; +template class Xhpmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cc index 458d7385..ec12324b 100644 --- a/src/routines/level2/xsymv.cc +++ b/src/routines/level2/xsymv.cc @@ -51,7 +51,7 @@ StatusCode Xsymv::DoSymv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, 0, 0); + is_upper, false, 0, 0); } // ================================================================================================= -- cgit v1.2.3