diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-10-17 21:15:53 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-10-17 21:15:53 +0200 |
commit | 9a1454496d557c7b0c2daeb84a5506a25c309477 (patch) | |
tree | 7c06c12f6a0b5e804ee234dc0e643374ca08fc8f | |
parent | e33542acddc8ed4e7e8c4b0cfd832a2fe92e6fa9 (diff) |
Fixed a bug with the pre-processing and the AXPY kernel
-rw-r--r-- | src/kernels/level1/xaxpy.opencl | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl index 2829237e..772b57f3 100644 --- a/src/kernels/level1/xaxpy.opencl +++ b/src/kernels/level1/xaxpy.opencl @@ -43,11 +43,11 @@ void XaxpyFaster(const int n, const real_arg arg_alpha, __global realV* ygm) { const real alpha = GetRealArg(arg_alpha); - const int num_worker_threads = n / (VW * WPT); - if (get_global_id(0) < num_worker_threads) { + const int num_usefull_threads = n / (VW * WPT); + if (get_global_id(0) < num_usefull_threads) { #pragma unroll for (int _w = 0; _w < WPT; _w += 1) { - const int id = _w*num_worker_threads + get_global_id(0); + const int id = _w*num_usefull_threads + get_global_id(0); realV xvalue = xgm[id]; realV yvalue = ygm[id]; ygm[id] = MultiplyAddVector(yvalue, alpha, xvalue); |