summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-05-08 17:30:31 +0200
committercnugteren <web@cedricnugteren.nl>2016-05-08 17:30:31 +0200
commit25a25dbd6f6065420392e59c726902e05c0d4a5a (patch)
tree6ee66d271b65da7a70ec264d9461aff625fec094 /src
parent1acb31896c2e6cabea2b2d8fe9511d3726743b54 (diff)
Fixed errors in xAXPY and xSCAL tests on AMD hardware
Diffstat (limited to 'src')
-rw-r--r--src/kernels/level1/xaxpy.opencl7
-rw-r--r--src/kernels/level1/xscal.opencl6
2 files changed, 9 insertions, 4 deletions
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl
index 1f1e8ce0..574beb43 100644
--- a/src/kernels/level1/xaxpy.opencl
+++ b/src/kernels/level1/xaxpy.opencl
@@ -30,7 +30,8 @@ __kernel void Xaxpy(const int n, const real alpha,
// Loops over the work that needs to be done (allows for an arbitrary number of threads)
#pragma unroll
for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
- MultiplyAdd(ygm[id*y_inc + y_offset], alpha, xgm[id*x_inc + x_offset]);
+ real xvalue = xgm[id*x_inc + x_offset];
+ MultiplyAdd(ygm[id*y_inc + y_offset], alpha, xvalue);
}
}
@@ -45,7 +46,9 @@ __kernel void XaxpyFast(const int n, const real alpha,
#pragma unroll
for (int w=0; w<WPT; ++w) {
const int id = w*get_global_size(0) + get_global_id(0);
- ygm[id] = MultiplyAddVector(ygm[id], alpha, xgm[id]);
+ realV xvalue = xgm[id];
+ realV yvalue = ygm[id];
+ ygm[id] = MultiplyAddVector(yvalue, alpha, xvalue);
}
}
diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl
index 956de3c0..59936776 100644
--- a/src/kernels/level1/xscal.opencl
+++ b/src/kernels/level1/xscal.opencl
@@ -29,8 +29,9 @@ __kernel void Xscal(const int n, const real alpha,
// Loops over the work that needs to be done (allows for an arbitrary number of threads)
#pragma unroll
for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
+ real xvalue = xgm[id*x_inc + x_offset];
real result;
- Multiply(result, alpha, xgm[id*x_inc + x_offset]);
+ Multiply(result, alpha, xvalue);
xgm[id*x_inc + x_offset] = result;
}
}
@@ -45,8 +46,9 @@ __kernel void XscalFast(const int n, const real alpha,
#pragma unroll
for (int w=0; w<WPT; ++w) {
const int id = w*get_global_size(0) + get_global_id(0);
+ realV xvalue = xgm[id];
realV result;
- result = MultiplyVector(result, alpha, xgm[id]);
+ result = MultiplyVector(result, alpha, xvalue);
xgm[id] = result;
}
}