diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-02-05 14:36:31 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-02-05 14:36:31 +0100 |
commit | e7cbb5915aef16f3a64566292459eaede5a600e5 (patch) | |
tree | 8a18e7018b1922d9d445eede6af7d5140f33dc71 /src/kernels/level2 | |
parent | c209dd7af90d604c8210cc5680b6c7a50b2b995f (diff) |
Fixed complex version of the TRSV kernel
Diffstat (limited to 'src/kernels/level2')
-rw-r--r-- | src/kernels/level2/xtrsv.opencl | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl index fd5de200..ebea77a3 100644 --- a/src/kernels/level2/xtrsv.opencl +++ b/src/kernels/level2/xtrsv.opencl @@ -55,7 +55,6 @@ void trsv_forward(int n, if (is_transposed == 0) { for (int i = 0; i < n; ++i) { alm[i][tid] = A[i + tid*a_ld + a_offset]; - if (do_conjugate) { COMPLEX_CONJUGATE(alm[i][tid]); } } } else { @@ -63,6 +62,11 @@ void trsv_forward(int n, alm[i][tid] = A[tid + i*a_ld + a_offset]; } } + if (do_conjugate) { + for (int i = 0; i < n; ++i) { + COMPLEX_CONJUGATE(alm[i][tid]); + } + } } barrier(CLK_LOCAL_MEM_FENCE); @@ -72,7 +76,7 @@ void trsv_forward(int n, for (int j = 0; j < i; ++j) { MultiplySubtract(xlm[i], alm[i][j], xlm[j]); } - if (is_unit_diagonal == 0) { DivideReal(xlm[i], xlm[i], alm[i][i]); } + if (is_unit_diagonal == 0) { DivideFull(xlm[i], xlm[i], alm[i][i]); } } } barrier(CLK_LOCAL_MEM_FENCE); @@ -99,7 +103,6 @@ void trsv_backward(int n, if (is_transposed == 0) { for (int i = 0; i < n; ++i) { alm[i][tid] = A[i + tid*a_ld + a_offset]; - if (do_conjugate) { COMPLEX_CONJUGATE(alm[i][tid]); } } } else { @@ -107,6 +110,11 @@ void trsv_backward(int n, alm[i][tid] = A[tid + i*a_ld + a_offset]; } } + if (do_conjugate) { + for (int i = 0; i < n; ++i) { + COMPLEX_CONJUGATE(alm[i][tid]); + } + } } barrier(CLK_LOCAL_MEM_FENCE); @@ -116,7 +124,7 @@ void trsv_backward(int n, for (int j = i + 1; j < n; ++j) { MultiplySubtract(xlm[i], alm[i][j], xlm[j]); } - if (is_unit_diagonal == 0) { DivideReal(xlm[i], xlm[i], alm[i][i]); } + if (is_unit_diagonal == 0) { DivideFull(xlm[i], xlm[i], alm[i][i]); } } } barrier(CLK_LOCAL_MEM_FENCE); |