From 80da67d28bbcff958071befb48ccacac05ebbe49 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 19 Sep 2015 17:40:38 +0200 Subject: Added the HPMV routine --- src/clblast.cc | 30 +++++++++++++------ src/kernels/level2/xgemv.opencl | 23 +++++++++++++++ src/routines/level2/xgbmv.cc | 2 +- src/routines/level2/xgemv.cc | 9 ++++-- src/routines/level2/xhbmv.cc | 2 +- src/routines/level2/xhemv.cc | 2 +- src/routines/level2/xhpmv.cc | 64 +++++++++++++++++++++++++++++++++++++++++ src/routines/level2/xsymv.cc | 2 +- 8 files changed, 118 insertions(+), 16 deletions(-) create mode 100644 src/routines/level2/xhpmv.cc (limited to 'src') diff --git a/src/clblast.cc b/src/clblast.cc index 3453312f..85c63442 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -31,6 +31,7 @@ #include "internal/routines/level2/xgbmv.h" #include "internal/routines/level2/xhemv.h" #include "internal/routines/level2/xhbmv.h" +#include "internal/routines/level2/xhpmv.h" #include "internal/routines/level2/xsymv.h" // BLAS level-3 includes @@ -465,15 +466,26 @@ template StatusCode Hbmv(const Layout, const Triangle, // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV template -StatusCode Hpmv(const Layout, const Triangle, - const size_t, - const T, - const cl_mem, const size_t, - const cl_mem, const size_t, const size_t, - const T, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Hpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xhpmv(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoHpmv(layout, triangle, + n, + alpha, + Buffer(ap_buffer), ap_offset, + Buffer(x_buffer), x_offset, x_inc, + beta, + Buffer(y_buffer), y_offset, y_inc); } template StatusCode Hpmv(const Layout, const Triangle, const size_t, diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index f0bf4405..f6c4476e 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -134,6 +134,29 @@ inline real LoadMatrixA(const __global real* restrict agm, const int x, const in } } + // For hermitian packed matrices + #elif defined(ROUTINE_HPMV) + if (parameter == 1) { + if (x <= y) { + result = agm[((y+1)*y)/2 + x + a_offset]; + if (x == y) { result.y = ZERO; } + } + else { + result = agm[((x+1)*x)/2 + y + a_offset]; + COMPLEX_CONJUGATE(result); + } + } + else { + if (x >= y) { + result = agm[((2*a_ld-(y+1))*y)/2 + x + a_offset]; + if (x == y) { result.y = ZERO; } + } + else { + result = agm[((2*a_ld-(x+1))*x)/2 + y + a_offset]; + COMPLEX_CONJUGATE(result); + } + } + // For symmetric matrices #elif defined(ROUTINE_SYMV) if ((parameter == 0 && y <= x) || (parameter == 1 && x <= y)) { diff --git a/src/routines/level2/xgbmv.cc b/src/routines/level2/xgbmv.cc index d8815542..14d391ca 100644 --- a/src/routines/level2/xgbmv.cc +++ b/src/routines/level2/xgbmv.cc @@ -52,7 +52,7 @@ StatusCode Xgbmv::DoGbmv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - 0, kl_real, ku_real); + 0, false, kl_real, ku_real); } // ================================================================================================= diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 0c312756..1b768dcd 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -55,7 +55,7 @@ StatusCode Xgemv::DoGemv(const Layout layout, const Transpose a_transpose, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, true, true, - 0, 0, 0); // N/A for this routine + 0, false, 0, 0); // N/A for this routine } // ================================================================================================= @@ -69,7 +69,8 @@ StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, const T beta, const Buffer &y_buffer, const size_t y_offset, const size_t y_inc, - bool fast_kernel, bool fast_kernel_rot, const size_t parameter, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, const size_t kl, const size_t ku) { // Makes sure all dimensions are larger than zero @@ -97,7 +98,9 @@ StatusCode Xgemv::MatVec(const Layout layout, const Transpose a_transpose, auto a_conjugate = (a_transpose == Transpose::kConjugate); // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = StatusCode::kSuccess; + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } + else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); } if (ErrorIn(status)) { return status; } status = TestVectorX(n_real, x_buffer, x_offset, x_inc, sizeof(T)); if (ErrorIn(status)) { return status; } diff --git a/src/routines/level2/xhbmv.cc b/src/routines/level2/xhbmv.cc index 90c0c787..f59a7cb3 100644 --- a/src/routines/level2/xhbmv.cc +++ b/src/routines/level2/xhbmv.cc @@ -51,7 +51,7 @@ StatusCode Xhbmv::DoHbmv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, k, 0); + is_upper, false, k, 0); } // ================================================================================================= diff --git a/src/routines/level2/xhemv.cc b/src/routines/level2/xhemv.cc index efad829b..5a58b28b 100644 --- a/src/routines/level2/xhemv.cc +++ b/src/routines/level2/xhemv.cc @@ -51,7 +51,7 @@ StatusCode Xhemv::DoHemv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, 0, 0); + is_upper, false, 0, 0); } // ================================================================================================= diff --git a/src/routines/level2/xhpmv.cc b/src/routines/level2/xhpmv.cc new file mode 100644 index 00000000..2269255d --- /dev/null +++ b/src/routines/level2/xhpmv.cc @@ -0,0 +1,64 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements the Xhpmv class (see the header for information about the class). +// +// ================================================================================================= + +#include "internal/routines/level2/xhpmv.h" + +#include +#include + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template +Xhpmv::Xhpmv(Queue &queue, Event &event, const std::string &name): + Xgemv(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template +StatusCode Xhpmv::DoHpmv(const Layout layout, const Triangle triangle, + const size_t n, + const T alpha, + const Buffer &ap_buffer, const size_t ap_offset, + const Buffer &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer &y_buffer, const size_t y_offset, const size_t y_inc) { + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // Runs the generic matrix-vector multiplication, disabling the use of fast vectorized kernels. + // The specific hermitian packed matrix-accesses are implemented in the kernel guarded by the + // ROUTINE_HPMV define. + bool fast_kernels = false; + return MatVec(layout, Transpose::kNo, + n, n, alpha, + ap_buffer, ap_offset, n, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + fast_kernels, fast_kernels, + is_upper, true, 0, 0); +} + +// ================================================================================================= + +// Compiles the templated class +template class Xhpmv; +template class Xhpmv; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xsymv.cc b/src/routines/level2/xsymv.cc index 458d7385..ec12324b 100644 --- a/src/routines/level2/xsymv.cc +++ b/src/routines/level2/xsymv.cc @@ -51,7 +51,7 @@ StatusCode Xsymv::DoSymv(const Layout layout, const Triangle triangle, x_buffer, x_offset, x_inc, beta, y_buffer, y_offset, y_inc, fast_kernels, fast_kernels, - is_upper, 0, 0); + is_upper, false, 0, 0); } // ================================================================================================= -- cgit v1.2.3