diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2022-05-16 08:38:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-16 08:38:35 +0200 |
commit | 1884158128bbc7df5651947e903e763e162f82d8 (patch) | |
tree | 1dfdf4c6127f2334208b368fb164d45a8fe4ef2a | |
parent | cb43f264cb7b51700cc2e1ae4a8262af7e99ac7d (diff) | |
parent | fc238a96c9d2049a380b9c09e356ac2876f9a735 (diff) |
Merge pull request #432 from justingra/sum-fix
sum fix
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | src/kernels/level1/xasum.opencl | 2 |
2 files changed, 2 insertions, 1 deletions
@@ -2,6 +2,7 @@ Development version (next version) - Fix a correctness issue with DGEMM on SM 7.5 Turing GPUs - Various minor fixes and enhancements - Added tuned parameters for various devices (see doc/tuning.md) +- Changed the complex sum routine to return the complex sum instead of the absolute complex sum. Version 1.5.2 - Changed XAMAX/XAMIN to more likely return first rather than last min/max index, updated API docs diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl index 29e7fa3e..42e49d4c 100644 --- a/src/kernels/level1/xasum.opencl +++ b/src/kernels/level1/xasum.opencl @@ -93,7 +93,7 @@ void XasumEpilogue(const __global real* restrict input, // Computes the absolute value and stores the final result if (lid == 0) { - #if PRECISION == 3232 || PRECISION == 6464 + #if (PRECISION == 3232 || PRECISION == 6464) && defined(ROUTINE_ASUM) asum[asum_offset].x = lm[0].x + lm[0].y; // the result is a non-complex number #else asum[asum_offset] = lm[0]; |