Added non-absolute counter-parts xSUM and IxMAX of the BLAS routines xASUM and IxAMAX

author: Cedric Nugteren <web@cedricnugteren.nl> 2016-04-27 18:07:30 +0200
committer: Cedric Nugteren <web@cedricnugteren.nl> 2016-04-27 18:07:30 +0200
commit: d7ddbdeb1f416f56bc469d16c051551207274703 (patch)
tree: 2f5ba3abc5a97509b84ecdd1fdf5b449ab543eec /src/kernels
parent: 13eed1a0f973ff2090062a1ad4485896b22949b0 (diff)
3 files changed, 20 insertions, 4 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 57d75ee0..d401744d 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -40,6 +40,7 @@ R"(
   typedef float16 real16;
   #define ZERO 0.0f
   #define ONE 1.0f
+  #define SMALLEST -1.0e37f
 
 // Double-precision 
 #elif PRECISION == 64
@@ -50,6 +51,7 @@ R"(
   typedef double16 real16;
   #define ZERO 0.0
   #define ONE 1.0
+  #define SMALLEST -1.0e37
 
 // Complex single-precision
 #elif PRECISION == 3232
@@ -64,6 +66,7 @@ R"(
                            real sC; real sD; real sE; real sF;} real16;
   #define ZERO 0.0f
   #define ONE 1.0f
+  #define SMALLEST -1.0e37f
 
 // Complex Double-precision
 #elif PRECISION == 6464
@@ -78,6 +81,7 @@ R"(
                             real sC; real sD; real sE; real sF;} real16;
   #define ZERO 0.0
   #define ONE 1.0
+  #define SMALLEST -1.0e37
 #endif
 
 // Single-element version of a complex number
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl
index 03dd05e5..58b75ce2 100644
--- a/src/kernels/level1/xamax.opencl
+++ b/src/kernels/level1/xamax.opencl
@@ -41,14 +41,23 @@ __kernel void Xamax(const int n,
   const int num_groups = get_num_groups(0);
 
   // Performs loading and the first steps of the reduction
-  singlereal max = ZERO;
+  #if defined(ROUTINE_MAX) // non-absolute version
+    singlereal max = SMALLEST;
+  #else
+    singlereal max = ZERO;
+  #endif
   unsigned int imax = 0;
   int id = wgid*WGS1 + lid;
   while (id < n) {
+    const int x_index = id*x_inc + x_offset;
     #if PRECISION == 3232 || PRECISION == 6464
-      singlereal x = fabs(xgm[id*x_inc + x_offset].x);
+      singlereal x = xgm[x_index].x;
     #else
-      singlereal x = fabs(xgm[id*x_inc + x_offset]);
+      singlereal x = xgm[x_index];
+    #endif
+    #if defined(ROUTINE_MAX) // non-absolute version
+    #else
+      x = fabs(x);
     #endif
     if (x >= max) {
       max = x;
diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl
index 037dc57e..58d0f11b 100644
--- a/src/kernels/level1/xasum.opencl
+++ b/src/kernels/level1/xasum.opencl
@@ -45,7 +45,10 @@ __kernel void Xasum(const int n,
   int id = wgid*WGS1 + lid;
   while (id < n) {
     real x = xgm[id*x_inc + x_offset];
-    AbsoluteValue(x);
+    #if defined(ROUTINE_SUM) // non-absolute version
+    #else
+      AbsoluteValue(x);
+    #endif
     Add(acc, acc, x);
     id += WGS1*num_groups;
   }
author	Cedric Nugteren <web@cedricnugteren.nl>	2016-04-27 18:07:30 +0200
committer	Cedric Nugteren <web@cedricnugteren.nl>	2016-04-27 18:07:30 +0200
commit	d7ddbdeb1f416f56bc469d16c051551207274703 (patch)
tree	2f5ba3abc5a97509b84ecdd1fdf5b449ab543eec /src/kernels
parent	13eed1a0f973ff2090062a1ad4485896b22949b0 (diff)