diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-16 07:43:19 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-16 07:43:19 +0200 |
commit | 8f01c644b5c62958c1dcd4fd72b411f3805b81a6 (patch) | |
tree | d3e5e937904a5206c503769c38cc11912b12a3ad /src/kernels | |
parent | 9e2fba9ab9cab1f94dfe143fc6e163f47b6d6f39 (diff) |
Added support for complex conjugate transpose
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/common.opencl | 7 | ||||
-rw-r--r-- | src/kernels/pad.opencl | 4 | ||||
-rw-r--r-- | src/kernels/padtranspose.opencl | 6 |
3 files changed, 15 insertions, 2 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 154265e4..818c725f 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -112,6 +112,13 @@ R"( #define AXPBY(e, a, b, c, d) e = a*b + c*d #endif +// The complex conjugate operation for complex transforms +#if PRECISION == 3232 || PRECISION == 6464 + #define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y +#else + #define COMPLEX_CONJUGATE(value) value = value +#endif + // ================================================================================================= // End of the C++11 raw string literal diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl index ccaeb9d6..45eaef91 100644 --- a/src/kernels/pad.opencl +++ b/src/kernels/pad.opencl @@ -47,7 +47,8 @@ __kernel void PadMatrix(const int src_one, const int src_two, __global const real* restrict src, const int dest_one, const int dest_two, const int dest_ld, const int dest_offset, - __global real* dest) { + __global real* dest, + const int do_conjugate) { // Loops over the work per thread in both dimensions #pragma unroll @@ -67,6 +68,7 @@ __kernel void PadMatrix(const int src_one, const int src_two, } // Stores the value in the destination matrix + if (do_conjugate == 1) { COMPLEX_CONJUGATE(value); } dest[id_two*dest_ld + id_one + dest_offset] = value; } } diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl index 67cbf341..2f2aabd6 100644 --- a/src/kernels/padtranspose.opencl +++ b/src/kernels/padtranspose.opencl @@ -40,7 +40,8 @@ __kernel void PadTransposeMatrix(const int src_one, const int src_two, __global const real* restrict src, const int dest_one, const int dest_two, const int dest_ld, const int dest_offset, - __global real* dest) { + __global real* dest, + const int do_conjugate) { // Local memory to store a tile of the matrix (for coalescing) __local real tile[PADTRA_WPT*PADTRA_TILE][PADTRA_WPT*PADTRA_TILE + PADTRA_PAD]; @@ -83,12 +84,15 @@ __kernel void PadTransposeMatrix(const int src_one, const int src_two, // Stores the transposed value in the destination matrix if ((id_dest_one < dest_one) && (id_dest_two < dest_two)) { real value = tile[get_local_id(0)*PADTRA_WPT + w_two][get_local_id(1)*PADTRA_WPT + w_one]; + if (do_conjugate == 1) { COMPLEX_CONJUGATE(value); } dest[id_dest_two*dest_ld + id_dest_one + dest_offset] = value; } } } } +// ================================================================================================= + // Same as UnPadCopyMatrix, but now also does the transpose __attribute__((reqd_work_group_size(PADTRA_TILE, PADTRA_TILE, 1))) __kernel void UnPadTransposeMatrix(const int src_one, const int src_two, |