diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-07-07 07:34:36 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-07-07 07:34:36 +0200 |
commit | 599f9a70a6bb2388c27b7981276e4d39497f90fb (patch) | |
tree | 0a9c6f07c8fd8fbc1967f67658f3e5be09d0e586 /src/kernels | |
parent | d9ea0c47c65ff41da2d213cce8b0ef434e817ec2 (diff) |
Added option to set the imaginary part of the diagonal to zero
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/common.opencl | 7 | ||||
-rw-r--r-- | src/kernels/pad.opencl | 7 | ||||
-rw-r--r-- | src/kernels/padtranspose.opencl | 4 |
3 files changed, 15 insertions, 3 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 19af0056..0d29c7a6 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -92,6 +92,13 @@ R"( #define SetToZero(a) a = ZERO #endif +// Sets a variable to zero (only the imaginary part) +#if PRECISION == 3232 || PRECISION == 6464 + #define ImagToZero(a) a.y = ZERO +#else + #define ImagToZero(a) +#endif + // Sets a variable to one #if PRECISION == 3232 || PRECISION == 6464 #define SetToOne(a) a.x = ONE; a.y = ZERO diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl index 8294fab7..f8a89d24 100644 --- a/src/kernels/pad.opencl +++ b/src/kernels/pad.opencl @@ -87,7 +87,8 @@ __kernel void UnPadMatrix(const int src_one, const int src_two, const int dest_one, const int dest_two, const int dest_ld, const int dest_offset, __global real* dest, - const int upper, const int lower) { + const int upper, const int lower, + const int diagonal_imag_zero) { // Loops over the work per thread in both dimensions #pragma unroll @@ -106,7 +107,9 @@ __kernel void UnPadMatrix(const int src_one, const int src_two, // Copies the value into the destination matrix. This is always within bounds of the source // matrix, as we know that the destination matrix is smaller than the source. if (id_two < dest_two && id_one < dest_one) { - dest[id_two*dest_ld + id_one + dest_offset] = src[id_two*src_ld + id_one + src_offset]; + real value = src[id_two*src_ld + id_one + src_offset]; + if (diagonal_imag_zero == 1 && id_one == id_two) { ImagToZero(value); } + dest[id_two*dest_ld + id_one + dest_offset] = value; } } } diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl index 7e923392..b2b96aa0 100644 --- a/src/kernels/padtranspose.opencl +++ b/src/kernels/padtranspose.opencl @@ -101,7 +101,8 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two, const int dest_one, const int dest_two, const int dest_ld, const int dest_offset, __global real* dest, - const int upper, const int lower) { + const int upper, const int lower, + const int diagonal_imag_zero) { // Local memory to store a tile of the matrix (for coalescing) __local real tile[PADTRA_WPT*PADTRA_TILE][PADTRA_WPT*PADTRA_TILE + PADTRA_PAD]; @@ -147,6 +148,7 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two, // Stores the transposed value in the destination matrix if ((id_dest_one < dest_one) && (id_dest_two < dest_two)) { real value = tile[get_local_id(0)*PADTRA_WPT + w_two][get_local_id(1)*PADTRA_WPT + w_one]; + if (diagonal_imag_zero == 1 && id_dest_one == id_dest_two) { ImagToZero(value); } dest[id_dest_two*dest_ld + id_dest_one + dest_offset] = value; } } |