diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-05-08 17:30:31 +0200 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-05-08 17:30:31 +0200 |
commit | 25a25dbd6f6065420392e59c726902e05c0d4a5a (patch) | |
tree | 6ee66d271b65da7a70ec264d9461aff625fec094 /src/kernels | |
parent | 1acb31896c2e6cabea2b2d8fe9511d3726743b54 (diff) |
Fixed errors in xAXPY and xSCAL tests on AMD hardware
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level1/xaxpy.opencl | 7 | ||||
-rw-r--r-- | src/kernels/level1/xscal.opencl | 6 |
2 files changed, 9 insertions, 4 deletions
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl index 1f1e8ce0..574beb43 100644 --- a/src/kernels/level1/xaxpy.opencl +++ b/src/kernels/level1/xaxpy.opencl @@ -30,7 +30,8 @@ __kernel void Xaxpy(const int n, const real alpha, // Loops over the work that needs to be done (allows for an arbitrary number of threads) #pragma unroll for (int id = get_global_id(0); id<n; id += get_global_size(0)) { - MultiplyAdd(ygm[id*y_inc + y_offset], alpha, xgm[id*x_inc + x_offset]); + real xvalue = xgm[id*x_inc + x_offset]; + MultiplyAdd(ygm[id*y_inc + y_offset], alpha, xvalue); } } @@ -45,7 +46,9 @@ __kernel void XaxpyFast(const int n, const real alpha, #pragma unroll for (int w=0; w<WPT; ++w) { const int id = w*get_global_size(0) + get_global_id(0); - ygm[id] = MultiplyAddVector(ygm[id], alpha, xgm[id]); + realV xvalue = xgm[id]; + realV yvalue = ygm[id]; + ygm[id] = MultiplyAddVector(yvalue, alpha, xvalue); } } diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl index 956de3c0..59936776 100644 --- a/src/kernels/level1/xscal.opencl +++ b/src/kernels/level1/xscal.opencl @@ -29,8 +29,9 @@ __kernel void Xscal(const int n, const real alpha, // Loops over the work that needs to be done (allows for an arbitrary number of threads) #pragma unroll for (int id = get_global_id(0); id<n; id += get_global_size(0)) { + real xvalue = xgm[id*x_inc + x_offset]; real result; - Multiply(result, alpha, xgm[id*x_inc + x_offset]); + Multiply(result, alpha, xvalue); xgm[id*x_inc + x_offset] = result; } } @@ -45,8 +46,9 @@ __kernel void XscalFast(const int n, const real alpha, #pragma unroll for (int w=0; w<WPT; ++w) { const int id = w*get_global_size(0) + get_global_id(0); + realV xvalue = xgm[id]; realV result; - result = MultiplyVector(result, alpha, xgm[id]); + result = MultiplyVector(result, alpha, xvalue); xgm[id] = result; } } |