summaryrefslogtreecommitdiff
path: root/src/kernels/level2
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-02-05 14:36:31 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-02-05 14:36:31 +0100
commite7cbb5915aef16f3a64566292459eaede5a600e5 (patch)
tree8a18e7018b1922d9d445eede6af7d5140f33dc71 /src/kernels/level2
parentc209dd7af90d604c8210cc5680b6c7a50b2b995f (diff)
Fixed complex version of the TRSV kernel
Diffstat (limited to 'src/kernels/level2')
-rw-r--r--src/kernels/level2/xtrsv.opencl16
1 files changed, 12 insertions, 4 deletions
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl
index fd5de200..ebea77a3 100644
--- a/src/kernels/level2/xtrsv.opencl
+++ b/src/kernels/level2/xtrsv.opencl
@@ -55,7 +55,6 @@ void trsv_forward(int n,
if (is_transposed == 0) {
for (int i = 0; i < n; ++i) {
alm[i][tid] = A[i + tid*a_ld + a_offset];
- if (do_conjugate) { COMPLEX_CONJUGATE(alm[i][tid]); }
}
}
else {
@@ -63,6 +62,11 @@ void trsv_forward(int n,
alm[i][tid] = A[tid + i*a_ld + a_offset];
}
}
+ if (do_conjugate) {
+ for (int i = 0; i < n; ++i) {
+ COMPLEX_CONJUGATE(alm[i][tid]);
+ }
+ }
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -72,7 +76,7 @@ void trsv_forward(int n,
for (int j = 0; j < i; ++j) {
MultiplySubtract(xlm[i], alm[i][j], xlm[j]);
}
- if (is_unit_diagonal == 0) { DivideReal(xlm[i], xlm[i], alm[i][i]); }
+ if (is_unit_diagonal == 0) { DivideFull(xlm[i], xlm[i], alm[i][i]); }
}
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -99,7 +103,6 @@ void trsv_backward(int n,
if (is_transposed == 0) {
for (int i = 0; i < n; ++i) {
alm[i][tid] = A[i + tid*a_ld + a_offset];
- if (do_conjugate) { COMPLEX_CONJUGATE(alm[i][tid]); }
}
}
else {
@@ -107,6 +110,11 @@ void trsv_backward(int n,
alm[i][tid] = A[tid + i*a_ld + a_offset];
}
}
+ if (do_conjugate) {
+ for (int i = 0; i < n; ++i) {
+ COMPLEX_CONJUGATE(alm[i][tid]);
+ }
+ }
}
barrier(CLK_LOCAL_MEM_FENCE);
@@ -116,7 +124,7 @@ void trsv_backward(int n,
for (int j = i + 1; j < n; ++j) {
MultiplySubtract(xlm[i], alm[i][j], xlm[j]);
}
- if (is_unit_diagonal == 0) { DivideReal(xlm[i], xlm[i], alm[i][i]); }
+ if (is_unit_diagonal == 0) { DivideFull(xlm[i], xlm[i], alm[i][i]); }
}
}
barrier(CLK_LOCAL_MEM_FENCE);