diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-02-28 15:49:59 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-02-28 15:49:59 +0100 |
commit | 4a56822dcc7f723db0dc9a86fbb71abdd18cee31 (patch) | |
tree | dde22d90f7bbf4fbec755f4681f8237e8c05cbf2 /src/kernels | |
parent | e3545215a54c096e1c889124a9076cfb2f42df17 (diff) |
Fixed a couple of correctness bugs in the Xher kernels
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level2/level2.opencl | 18 | ||||
-rw-r--r-- | src/kernels/level2/xger.opencl | 4 | ||||
-rw-r--r-- | src/kernels/level2/xher.opencl | 2 |
3 files changed, 16 insertions, 8 deletions
diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl index ea7d7afd..ad92595a 100644 --- a/src/kernels/level2/level2.opencl +++ b/src/kernels/level2/level2.opencl @@ -34,7 +34,7 @@ R"( // Returns an element from a vector inline real LoadVector(const int id, const int max, - __global real* restrict gm, const int offset, const int inc, + __global real* gm, const int offset, const int inc, const int do_conjugate) { if (id < max) { real result = gm[id*inc + offset]; @@ -42,7 +42,7 @@ inline real LoadVector(const int id, const int max, #if defined(ROUTINE_GERC) COMPLEX_CONJUGATE(result); #endif - #if defined(ROUTINE_HER) + #if defined(ROUTINE_HER) || defined(ROUTINE_HPR) COMPLEX_CONJUGATE(result); #endif } @@ -57,14 +57,22 @@ inline real LoadVector(const int id, const int max, // Performs the rank-1 matrix update inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2, - __global real* restrict agm, const int a_offset, const int a_ld, - const real alpha, const real xvalue, const real yvalue) { + __global real* agm, const int a_offset, const int a_ld, + const real alpha, const real xvalue, const real yvalue, + const int is_upper) { // Bounds of a regular matrix if (id1 < max1 && id2 < max2) { #if defined(ROUTINE_SPR) || defined(ROUTINE_HPR) - const int a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 + a_offset : ((id1+1)*id1)/2 + id2 + a_offset; + int a_index; + if (is_upper) { + a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2; + } + else { + a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2; + } + a_index += a_offset; #else const int a_index = id2*a_ld + id1 + a_offset; #endif diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl index ce8e04bb..d377fbb0 100644 --- a/src/kernels/level2/xger.opencl +++ b/src/kernels/level2/xger.opencl @@ -58,7 +58,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha, // Loads A, performs the operation, and stores the result into A MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld, - alpha, xvalues[w2], yvalues[w1]); + alpha, xvalues[w2], yvalues[w1], false); } } } @@ -92,7 +92,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha, // Loads A, performs the operation, and stores the result into A MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld, - alpha, xvalues[w1], yvalues[w2]); + alpha, xvalues[w1], yvalues[w2], false); } } } diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl index 13bc4135..edb94ca8 100644 --- a/src/kernels/level2/xher.opencl +++ b/src/kernels/level2/xher.opencl @@ -59,7 +59,7 @@ __kernel void Xher(const int n, const real alpha, // Loads A, performs the operation, and stores the result into A else { - MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1]); + MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1], is_upper); } } } |