From aebd156869738f88c21a78a8df27e391e67da39b Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 19 Sep 2015 11:11:34 +0200 Subject: Added the HBMV routine --- src/kernels/level2/xgemv.opencl | 77 ++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 25 deletions(-) (limited to 'src/kernels/level2/xgemv.opencl') diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index 0ecfc960..f0bf4405 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -82,38 +82,65 @@ R"( // Defines how to load the input matrix in the non-vectorized case inline real LoadMatrixA(const __global real* restrict agm, const int x, const int y, - const int a_ld, const int a_offset, const int reversed, + const int a_ld, const int a_offset, const int parameter, const int kl, const int ku) { real result; - // For symmetric matrices - #if defined(ROUTINE_SYMV) - if ((reversed == 0 && y <= x) || (reversed == 1 && x <= y)) { - result = agm[y*a_ld + x + a_offset]; - } - else { - result = agm[x*a_ld + y + a_offset]; - } + // For banded matrices + #if defined(ROUTINE_GBMV) + const int k = ku - y; + if (x >= y-ku && x < y+kl+1) { result = agm[a_ld*y + k + x + a_offset]; } + else { SetToZero(result); } // For hermitian matrices #elif defined(ROUTINE_HEMV) - if ((reversed == 0 && y <= x) || (reversed == 1 && x <= y)) { - result = agm[y*a_ld + x + a_offset]; + if ((parameter == 0 && y <= x) || (parameter == 1 && x <= y)) { + result = agm[a_ld*y + x + a_offset]; if (x == y) { result.y = ZERO; } } else { - result = agm[x*a_ld + y + a_offset]; + result = agm[a_ld*x + y + a_offset]; COMPLEX_CONJUGATE(result); } - // For banded matrices - #elif defined(ROUTINE_GBMV) - const int k = ku-y+x; - if (x >= y-ku && x < y+kl+1) { - result = agm[a_ld*y + k + a_offset]; + // For hermitian banded matrices + #elif defined(ROUTINE_HBMV) + if (parameter == 1) { + if (x <= y) { + const int m = kl - y; + if (x >= y-kl && x <= y) { result = agm[a_ld*y + m + x + a_offset]; } + else { SetToZero(result); } + if (x == y) { result.y = ZERO; } + } + else { + const int m = kl - x; + if (y >= x-kl && y <= x) { result = agm[a_ld*x + m + y + a_offset]; } + else { SetToZero(result); } + COMPLEX_CONJUGATE(result); + } + } + else { + if (x >= y) { + const int m = -y; + if (x >= y && x < y+kl+1) { result = agm[a_ld*y + m + x + a_offset]; } + else { SetToZero(result); } + if (x == y) { result.y = ZERO; } + } + else { + const int m = -x; + if (y >= x && y < x+kl+1) { result = agm[a_ld*x + m + y + a_offset]; } + else { SetToZero(result); } + COMPLEX_CONJUGATE(result); + } + } + + // For symmetric matrices + #elif defined(ROUTINE_SYMV) + if ((parameter == 0 && y <= x) || (parameter == 1 && x <= y)) { + result = agm[y*a_ld + x + a_offset]; } else { - SetToZero(result); + result = agm[x*a_ld + y + a_offset]; } // For general matrices @@ -145,7 +172,7 @@ __kernel void Xgemv(const int m, const int n, const real alpha, const real beta, const __global real* restrict agm, const int a_offset, const int a_ld, const __global real* restrict xgm, const int x_offset, const int x_inc, __global real* ygm, const int y_offset, const int y_inc, - const int do_conjugate, const int reversed, + const int do_conjugate, const int parameter, const int kl, const int ku) { // Local memory for the vector X @@ -183,7 +210,7 @@ __kernel void Xgemv(const int m, const int n, const real alpha, const real beta, #pragma unroll for (int kloop=0; kloop