summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-07-07 07:34:36 +0200
committerCNugteren <web@cedricnugteren.nl>2015-07-07 07:34:36 +0200
commit599f9a70a6bb2388c27b7981276e4d39497f90fb (patch)
tree0a9c6f07c8fd8fbc1967f67658f3e5be09d0e586 /src/kernels
parentd9ea0c47c65ff41da2d213cce8b0ef434e817ec2 (diff)
Added option to set the imaginary part of the diagonal to zero
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/common.opencl7
-rw-r--r--src/kernels/pad.opencl7
-rw-r--r--src/kernels/padtranspose.opencl4
3 files changed, 15 insertions, 3 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 19af0056..0d29c7a6 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -92,6 +92,13 @@ R"(
#define SetToZero(a) a = ZERO
#endif
+// Sets a variable to zero (only the imaginary part)
+#if PRECISION == 3232 || PRECISION == 6464
+ #define ImagToZero(a) a.y = ZERO
+#else
+ #define ImagToZero(a)
+#endif
+
// Sets a variable to one
#if PRECISION == 3232 || PRECISION == 6464
#define SetToOne(a) a.x = ONE; a.y = ZERO
diff --git a/src/kernels/pad.opencl b/src/kernels/pad.opencl
index 8294fab7..f8a89d24 100644
--- a/src/kernels/pad.opencl
+++ b/src/kernels/pad.opencl
@@ -87,7 +87,8 @@ __kernel void UnPadMatrix(const int src_one, const int src_two,
const int dest_one, const int dest_two,
const int dest_ld, const int dest_offset,
__global real* dest,
- const int upper, const int lower) {
+ const int upper, const int lower,
+ const int diagonal_imag_zero) {
// Loops over the work per thread in both dimensions
#pragma unroll
@@ -106,7 +107,9 @@ __kernel void UnPadMatrix(const int src_one, const int src_two,
// Copies the value into the destination matrix. This is always within bounds of the source
// matrix, as we know that the destination matrix is smaller than the source.
if (id_two < dest_two && id_one < dest_one) {
- dest[id_two*dest_ld + id_one + dest_offset] = src[id_two*src_ld + id_one + src_offset];
+ real value = src[id_two*src_ld + id_one + src_offset];
+ if (diagonal_imag_zero == 1 && id_one == id_two) { ImagToZero(value); }
+ dest[id_two*dest_ld + id_one + dest_offset] = value;
}
}
}
diff --git a/src/kernels/padtranspose.opencl b/src/kernels/padtranspose.opencl
index 7e923392..b2b96aa0 100644
--- a/src/kernels/padtranspose.opencl
+++ b/src/kernels/padtranspose.opencl
@@ -101,7 +101,8 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
const int dest_one, const int dest_two,
const int dest_ld, const int dest_offset,
__global real* dest,
- const int upper, const int lower) {
+ const int upper, const int lower,
+ const int diagonal_imag_zero) {
// Local memory to store a tile of the matrix (for coalescing)
__local real tile[PADTRA_WPT*PADTRA_TILE][PADTRA_WPT*PADTRA_TILE + PADTRA_PAD];
@@ -147,6 +148,7 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
// Stores the transposed value in the destination matrix
if ((id_dest_one < dest_one) && (id_dest_two < dest_two)) {
real value = tile[get_local_id(0)*PADTRA_WPT + w_two][get_local_id(1)*PADTRA_WPT + w_one];
+ if (diagonal_imag_zero == 1 && id_dest_one == id_dest_two) { ImagToZero(value); }
dest[id_dest_two*dest_ld + id_dest_one + dest_offset] = value;
}
}