summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-02-28 15:49:59 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2016-02-28 15:49:59 +0100
commit4a56822dcc7f723db0dc9a86fbb71abdd18cee31 (patch)
treedde22d90f7bbf4fbec755f4681f8237e8c05cbf2 /src/kernels
parente3545215a54c096e1c889124a9076cfb2f42df17 (diff)
Fixed a couple of correctness bugs in the Xher kernels
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level2/level2.opencl18
-rw-r--r--src/kernels/level2/xger.opencl4
-rw-r--r--src/kernels/level2/xher.opencl2
3 files changed, 16 insertions, 8 deletions
diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl
index ea7d7afd..ad92595a 100644
--- a/src/kernels/level2/level2.opencl
+++ b/src/kernels/level2/level2.opencl
@@ -34,7 +34,7 @@ R"(
// Returns an element from a vector
inline real LoadVector(const int id, const int max,
- __global real* restrict gm, const int offset, const int inc,
+ __global real* gm, const int offset, const int inc,
const int do_conjugate) {
if (id < max) {
real result = gm[id*inc + offset];
@@ -42,7 +42,7 @@ inline real LoadVector(const int id, const int max,
#if defined(ROUTINE_GERC)
COMPLEX_CONJUGATE(result);
#endif
- #if defined(ROUTINE_HER)
+ #if defined(ROUTINE_HER) || defined(ROUTINE_HPR)
COMPLEX_CONJUGATE(result);
#endif
}
@@ -57,14 +57,22 @@ inline real LoadVector(const int id, const int max,
// Performs the rank-1 matrix update
inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
- __global real* restrict agm, const int a_offset, const int a_ld,
- const real alpha, const real xvalue, const real yvalue) {
+ __global real* agm, const int a_offset, const int a_ld,
+ const real alpha, const real xvalue, const real yvalue,
+ const int is_upper) {
// Bounds of a regular matrix
if (id1 < max1 && id2 < max2) {
#if defined(ROUTINE_SPR) || defined(ROUTINE_HPR)
- const int a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 + a_offset : ((id1+1)*id1)/2 + id2 + a_offset;
+ int a_index;
+ if (is_upper) {
+ a_index = (id1 <= id2) ? ((id2+1)*id2)/2 + id1 : ((id1+1)*id1)/2 + id2;
+ }
+ else {
+ a_index = (id1 >= id2) ? ((2*a_ld-(id2+1))*id2)/2 + id1 : ((2*a_ld-(id1+1))*id1)/2 + id2;
+ }
+ a_index += a_offset;
#else
const int a_index = id2*a_ld + id1 + a_offset;
#endif
diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl
index ce8e04bb..d377fbb0 100644
--- a/src/kernels/level2/xger.opencl
+++ b/src/kernels/level2/xger.opencl
@@ -58,7 +58,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha,
// Loads A, performs the operation, and stores the result into A
MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld,
- alpha, xvalues[w2], yvalues[w1]);
+ alpha, xvalues[w2], yvalues[w1], false);
}
}
}
@@ -92,7 +92,7 @@ __kernel void Xger(const int max1, const int max2, const real alpha,
// Loads A, performs the operation, and stores the result into A
MatrixUpdate(id1, id2, max1, max2, agm, a_offset, a_ld,
- alpha, xvalues[w1], yvalues[w2]);
+ alpha, xvalues[w1], yvalues[w2], false);
}
}
}
diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl
index 13bc4135..edb94ca8 100644
--- a/src/kernels/level2/xher.opencl
+++ b/src/kernels/level2/xher.opencl
@@ -59,7 +59,7 @@ __kernel void Xher(const int n, const real alpha,
// Loads A, performs the operation, and stores the result into A
else {
- MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1]);
+ MatrixUpdate(id1, id2, n, n, agm, a_offset, a_ld, alpha, xvalues[w2], xtvalues[w1], is_upper);
}
}
}