summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-10-17 21:15:53 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-10-17 21:15:53 +0200
commit9a1454496d557c7b0c2daeb84a5506a25c309477 (patch)
tree7c06c12f6a0b5e804ee234dc0e643374ca08fc8f
parente33542acddc8ed4e7e8c4b0cfd832a2fe92e6fa9 (diff)
Fixed a bug with the pre-processing and the AXPY kernel
-rw-r--r--src/kernels/level1/xaxpy.opencl6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl
index 2829237e..772b57f3 100644
--- a/src/kernels/level1/xaxpy.opencl
+++ b/src/kernels/level1/xaxpy.opencl
@@ -43,11 +43,11 @@ void XaxpyFaster(const int n, const real_arg arg_alpha,
__global realV* ygm) {
const real alpha = GetRealArg(arg_alpha);
- const int num_worker_threads = n / (VW * WPT);
- if (get_global_id(0) < num_worker_threads) {
+ const int num_usefull_threads = n / (VW * WPT);
+ if (get_global_id(0) < num_usefull_threads) {
#pragma unroll
for (int _w = 0; _w < WPT; _w += 1) {
- const int id = _w*num_worker_threads + get_global_id(0);
+ const int id = _w*num_usefull_threads + get_global_id(0);
realV xvalue = xgm[id];
realV yvalue = ygm[id];
ygm[id] = MultiplyAddVector(yvalue, alpha, xvalue);