From 73f49e9b3d4abc4214122e4b8c07a736e01626ee Mon Sep 17 00:00:00 2001 From: "Angus, Alexander" Date: Tue, 17 Jan 2023 08:35:29 -0800 Subject: Updated according to feedback from CNugteren --- src/kernels/level1/xamax.opencl | 4 ++-- src/kernels/level1/xasum.opencl | 4 ++-- src/kernels/level1/xaxpy.opencl | 8 ++++---- src/kernels/level1/xcopy.opencl | 4 ++-- src/kernels/level1/xdot.opencl | 4 ++-- src/kernels/level1/xhad.opencl | 6 +++--- src/kernels/level1/xnrm2.opencl | 4 ++-- src/kernels/level1/xscal.opencl | 4 ++-- src/kernels/level1/xswap.opencl | 4 ++-- 9 files changed, 21 insertions(+), 21 deletions(-) (limited to 'src/kernels/level1') diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl index 3600b9d2..06a6773b 100644 --- a/src/kernels/level1/xamax.opencl +++ b/src/kernels/level1/xamax.opencl @@ -32,7 +32,7 @@ R"( // The main reduction kernel, performing the loading and the majority of the operation #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) #endif void Xamax(const int n, @@ -102,7 +102,7 @@ void Xamax(const int n, // be launched with a single workgroup only. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS2, 1, 1))) #endif void XamaxEpilogue(const __global singlereal* restrict maxgm, diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl index 875221f4..683c6fad 100644 --- a/src/kernels/level1/xasum.opencl +++ b/src/kernels/level1/xasum.opencl @@ -32,7 +32,7 @@ R"( // The main reduction kernel, performing the loading and the majority of the operation #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) #endif void Xasum(const int n, @@ -79,7 +79,7 @@ void Xasum(const int n, // be launched with a single workgroup only. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS2, 1, 1))) #endif void XasumEpilogue(const __global real* restrict input, diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl index b20ad200..a106ed01 100644 --- a/src/kernels/level1/xaxpy.opencl +++ b/src/kernels/level1/xaxpy.opencl @@ -24,7 +24,7 @@ R"( // Full version of the kernel with offsets and strided accesses #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void Xaxpy(const int n, const real_arg arg_alpha, @@ -43,7 +43,7 @@ void Xaxpy(const int n, const real_arg arg_alpha, // assumes that 'n' is dividable by 'VW' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XaxpyFaster(const int n, const real_arg arg_alpha, @@ -67,7 +67,7 @@ void XaxpyFaster(const int n, const real_arg arg_alpha, // dividable by 'VW', 'WGS' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XaxpyFastest(const int n, const real_arg arg_alpha, @@ -89,7 +89,7 @@ void XaxpyFastest(const int n, const real_arg arg_alpha, // Full version of the kernel with offsets and strided accesses: batched version #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XaxpyBatched(const int n, const __constant real_arg* arg_alphas, diff --git a/src/kernels/level1/xcopy.opencl b/src/kernels/level1/xcopy.opencl index 174bf0c6..493197af 100644 --- a/src/kernels/level1/xcopy.opencl +++ b/src/kernels/level1/xcopy.opencl @@ -24,7 +24,7 @@ R"( // Full version of the kernel with offsets and strided accesses #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void Xcopy(const int n, @@ -43,7 +43,7 @@ void Xcopy(const int n, // dividable by 'VW', 'WGS' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XcopyFast(const int n, diff --git a/src/kernels/level1/xdot.opencl b/src/kernels/level1/xdot.opencl index e14b6306..64f6eb9d 100644 --- a/src/kernels/level1/xdot.opencl +++ b/src/kernels/level1/xdot.opencl @@ -32,7 +32,7 @@ R"( // The main reduction kernel, performing the multiplication and the majority of the sum operation #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) #endif void Xdot(const int n, @@ -78,7 +78,7 @@ void Xdot(const int n, // be launched with a single workgroup only. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS2, 1, 1))) #endif void XdotEpilogue(const __global real* restrict input, diff --git a/src/kernels/level1/xhad.opencl b/src/kernels/level1/xhad.opencl index aee98f91..47bb5170 100644 --- a/src/kernels/level1/xhad.opencl +++ b/src/kernels/level1/xhad.opencl @@ -68,7 +68,7 @@ INLINE_FUNC realV MultiplyVectorVector(realV cvec, const realV aval, const realV // Full version of the kernel with offsets and strided accesses #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta, @@ -96,7 +96,7 @@ void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta, // assumes that 'n' is dividable by 'VW' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta, @@ -127,7 +127,7 @@ void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta, // dividable by 'VW', 'WGS' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XhadFastest(const int n, const real_arg arg_alpha, const real_arg arg_beta, diff --git a/src/kernels/level1/xnrm2.opencl b/src/kernels/level1/xnrm2.opencl index fb45effb..36ea49b4 100644 --- a/src/kernels/level1/xnrm2.opencl +++ b/src/kernels/level1/xnrm2.opencl @@ -32,7 +32,7 @@ R"( // The main reduction kernel, performing the multiplication and the majority of the operation #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) #endif void Xnrm2(const int n, @@ -77,7 +77,7 @@ void Xnrm2(const int n, // be launched with a single workgroup only. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS2, 1, 1))) #endif void Xnrm2Epilogue(const __global real* restrict input, diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl index 19ca9135..e4260c7c 100644 --- a/src/kernels/level1/xscal.opencl +++ b/src/kernels/level1/xscal.opencl @@ -24,7 +24,7 @@ R"( // Full version of the kernel with offsets and strided accesses #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void Xscal(const int n, const real_arg arg_alpha, @@ -46,7 +46,7 @@ void Xscal(const int n, const real_arg arg_alpha, // dividable by 'VW', 'WGS' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XscalFast(const int n, const real_arg arg_alpha, diff --git a/src/kernels/level1/xswap.opencl b/src/kernels/level1/xswap.opencl index a2b44de3..2d384423 100644 --- a/src/kernels/level1/xswap.opencl +++ b/src/kernels/level1/xswap.opencl @@ -24,7 +24,7 @@ R"( // Full version of the kernel with offsets and strided accesses #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void Xswap(const int n, @@ -45,7 +45,7 @@ void Xswap(const int n, // dividable by 'VW', 'WGS' and 'WPT'. #if RELAX_WORKGROUP_SIZE == 1 __kernel -#elif +#else __kernel __attribute__((reqd_work_group_size(WGS, 1, 1))) #endif void XswapFast(const int n, -- cgit v1.2.3