summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-06-16 07:43:19 +0200
committerCNugteren <web@cedricnugteren.nl>2015-06-16 07:43:19 +0200
commit8f01c644b5c62958c1dcd4fd72b411f3805b81a6 (patch)
treed3e5e937904a5206c503769c38cc11912b12a3ad /src/kernels
parent9e2fba9ab9cab1f94dfe143fc6e163f47b6d6f39 (diff)
Added support for complex conjugate transpose
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/common.opencl7
-rw-r--r--src/kernels/pad.opencl4
-rw-r--r--src/kernels/padtranspose.opencl6
3 files changed, 15 insertions, 2 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 154265e4..818c725f 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -112,6 +112,13 @@ R"(
#define AXPBY(e, a, b, c, d) e = a*b + c*d
#endif
+// The complex conjugate operation for complex transforms
+#if PRECISION == 3232 || PRECISION == 6464
+ #define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y
+#else
+ #define COMPLEX_CONJUGATE(value) value = value
+#endif
+
// =================================================================================================
// End of the C++11 raw string literal
diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl
index ccaeb9d6..45eaef91 100644
--- a/src/kernels/pad.opencl
+++ b/src/kernels/pad.opencl
@@ -47,7 +47,8 @@ __kernel void PadMatrix(const int src_one, const int src_two,
__global const real* restrict src,
const int dest_one, const int dest_two,
const int dest_ld, const int dest_offset,
- __global real* dest) {
+ __global real* dest,
+ const int do_conjugate) {
// Loops over the work per thread in both dimensions
#pragma unroll
@@ -67,6 +68,7 @@ __kernel void PadMatrix(const int src_one, const int src_two,
}
// Stores the value in the destination matrix
+ if (do_conjugate == 1) { COMPLEX_CONJUGATE(value); }
dest[id_two*dest_ld + id_one + dest_offset] = value;
}
}
diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl
index 67cbf341..2f2aabd6 100644
--- a/src/kernels/padtranspose.opencl
+++ b/src/kernels/padtranspose.opencl
@@ -40,7 +40,8 @@ __kernel void PadTransposeMatrix(const int src_one, const int src_two,
__global const real* restrict src,
const int dest_one, const int dest_two,
const int dest_ld, const int dest_offset,
- __global real* dest) {
+ __global real* dest,
+ const int do_conjugate) {
// Local memory to store a tile of the matrix (for coalescing)
__local real tile[PADTRA_WPT*PADTRA_TILE][PADTRA_WPT*PADTRA_TILE + PADTRA_PAD];
@@ -83,12 +84,15 @@ __kernel void PadTransposeMatrix(const int src_one, const int src_two,
// Stores the transposed value in the destination matrix
if ((id_dest_one < dest_one) && (id_dest_two < dest_two)) {
real value = tile[get_local_id(0)*PADTRA_WPT + w_two][get_local_id(1)*PADTRA_WPT + w_one];
+ if (do_conjugate == 1) { COMPLEX_CONJUGATE(value); }
dest[id_dest_two*dest_ld + id_dest_one + dest_offset] = value;
}
}
}
}
+// =================================================================================================
+
// Same as UnPadCopyMatrix, but now also does the transpose
__attribute__((reqd_work_group_size(PADTRA_TILE, PADTRA_TILE, 1)))
__kernel void UnPadTransposeMatrix(const int src_one, const int src_two,