diff options
Diffstat (limited to 'src/kernels/level1/xhad.opencl')
-rw-r--r-- | src/kernels/level1/xhad.opencl | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/src/kernels/level1/xhad.opencl b/src/kernels/level1/xhad.opencl index 24e0c76c..47bb5170 100644 --- a/src/kernels/level1/xhad.opencl +++ b/src/kernels/level1/xhad.opencl @@ -66,7 +66,11 @@ INLINE_FUNC realV MultiplyVectorVector(realV cvec, const realV aval, const realV // ================================================================================================= // Full version of the kernel with offsets and strided accesses -__kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#endif void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta, const __global real* restrict xgm, const int x_offset, const int x_inc, const __global real* restrict ygm, const int y_offset, const int y_inc, @@ -90,7 +94,11 @@ void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta, // Faster version of the kernel without offsets and strided accesses but with if-statement. Also // assumes that 'n' is dividable by 'VW' and 'WPT'. -__kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#endif void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta, const __global realV* restrict xgm, const __global realV* restrict ygm, __global realV* zgm) { @@ -117,7 +125,11 @@ void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta, // Faster version of the kernel without offsets and strided accesses. Also assumes that 'n' is // dividable by 'VW', 'WGS' and 'WPT'. -__kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) +#endif void XhadFastest(const int n, const real_arg arg_alpha, const real_arg arg_beta, const __global realV* restrict xgm, const __global realV* restrict ygm, __global realV* zgm) { |