summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2022-05-16 08:38:35 +0200
committerGitHub <noreply@github.com>2022-05-16 08:38:35 +0200
commit1884158128bbc7df5651947e903e763e162f82d8 (patch)
tree1dfdf4c6127f2334208b368fb164d45a8fe4ef2a
parentcb43f264cb7b51700cc2e1ae4a8262af7e99ac7d (diff)
parentfc238a96c9d2049a380b9c09e356ac2876f9a735 (diff)
Merge pull request #432 from justingra/sum-fix
sum fix
-rw-r--r--CHANGELOG1
-rw-r--r--src/kernels/level1/xasum.opencl2
2 files changed, 2 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 424cab04..8ae1874e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@ Development version (next version)
- Fix a correctness issue with DGEMM on SM 7.5 Turing GPUs
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see doc/tuning.md)
+- Changed the complex sum routine to return the complex sum instead of the absolute complex sum.
Version 1.5.2
- Changed XAMAX/XAMIN to more likely return first rather than last min/max index, updated API docs
diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl
index 29e7fa3e..42e49d4c 100644
--- a/src/kernels/level1/xasum.opencl
+++ b/src/kernels/level1/xasum.opencl
@@ -93,7 +93,7 @@ void XasumEpilogue(const __global real* restrict input,
// Computes the absolute value and stores the final result
if (lid == 0) {
- #if PRECISION == 3232 || PRECISION == 6464
+ #if (PRECISION == 3232 || PRECISION == 6464) && defined(ROUTINE_ASUM)
asum[asum_offset].x = lm[0].x + lm[0].y; // the result is a non-complex number
#else
asum[asum_offset] = lm[0];