diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-11-25 17:46:01 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-11-25 17:46:01 +0100 |
commit | 69aa3b35ed499b5ba509d25ece97a24b66a456d9 (patch) | |
tree | 1cba91b8fcfb8a3da977420781d29e190729aa02 /src/kernels/level1 | |
parent | f01bcded1e34e3b031e78cee357d1c1e0f1aa5be (diff) |
Implemented first simple pre-processor: defines parser and loop unrolling based on assumptions
Diffstat (limited to 'src/kernels/level1')
-rw-r--r-- | src/kernels/level1/xaxpy.opencl | 10 |
1 files changed, 4 insertions, 6 deletions
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl index d30d4e55..3a574ec2 100644 --- a/src/kernels/level1/xaxpy.opencl +++ b/src/kernels/level1/xaxpy.opencl @@ -29,8 +29,7 @@ void Xaxpy(const int n, const real_arg arg_alpha, const real alpha = GetRealArg(arg_alpha); // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll - for (int id = get_global_id(0); id<n; id += get_global_size(0)) { + for (int id = get_global_id(0); id < n; id += get_global_size(0)) { real xvalue = xgm[id*x_inc + x_offset]; MultiplyAdd(ygm[id*y_inc + y_offset], alpha, xvalue); } @@ -46,7 +45,7 @@ void XaxpyFaster(const int n, const real_arg arg_alpha, if (get_global_id(0) < n / (VW)) { #pragma unroll - for (int w=0; w<WPT; ++w) { + for (int w = 0; w < WPT; w += 1) { const int id = w*get_global_size(0) + get_global_id(0); realV xvalue = xgm[id]; realV yvalue = ygm[id]; @@ -64,7 +63,7 @@ void XaxpyFastest(const int n, const real_arg arg_alpha, const real alpha = GetRealArg(arg_alpha); #pragma unroll - for (int w=0; w<WPT; ++w) { + for (int w = 0; w < WPT; w += 1) { const int id = w*get_global_size(0) + get_global_id(0); realV xvalue = xgm[id]; realV yvalue = ygm[id]; @@ -83,8 +82,7 @@ void XaxpyBatched(const int n, const __constant real_arg* arg_alphas, const real alpha = GetRealArg(arg_alphas[batch]); // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll - for (int id = get_global_id(0); id<n; id += get_global_size(0)) { + for (int id = get_global_id(0); id < n; id += get_global_size(0)) { real xvalue = xgm[id*x_inc + x_offsets[batch]]; MultiplyAdd(ygm[id*y_inc + y_offsets[batch]], alpha, xvalue); } |