From 10205d773e1477fdd634dbc7e224cc71361a9885 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 14 Apr 2017 20:16:10 +0200 Subject: Added a new Xaxpy kernel in between the regular and fast version in --- src/kernels/level1/xaxpy.opencl | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'src/kernels') diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl index f44bbce0..d30d4e55 100644 --- a/src/kernels/level1/xaxpy.opencl +++ b/src/kernels/level1/xaxpy.opencl @@ -36,12 +36,31 @@ void Xaxpy(const int n, const real_arg arg_alpha, } } +// Faster version of the kernel without offsets and strided accesses but with if-statement. Also +// assumes that 'n' is dividable by 'VW' and 'WPT'. +__kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +void XaxpyFaster(const int n, const real_arg arg_alpha, + const __global realV* restrict xgm, + __global realV* ygm) { + const real alpha = GetRealArg(arg_alpha); + + if (get_global_id(0) < n / (VW)) { + #pragma unroll + for (int w=0; w