summaryrefslogtreecommitdiff
path: root/src/kernels/level1
diff options
context:
space:
mode:
authorAngus, Alexander <aangus@qti.qualcomm.com>2023-01-17 08:35:29 -0800
committerAngus, Alexander <aangus@qti.qualcomm.com>2023-01-17 08:35:29 -0800
commit73f49e9b3d4abc4214122e4b8c07a736e01626ee (patch)
tree588a426b6350a5c982d89d98749ae78667fd23b4 /src/kernels/level1
parentff6a5689dff31ed3c1f1906a6b425252fd60a9ee (diff)
Updated according to feedback from CNugteren
Diffstat (limited to 'src/kernels/level1')
-rw-r--r--src/kernels/level1/xamax.opencl4
-rw-r--r--src/kernels/level1/xasum.opencl4
-rw-r--r--src/kernels/level1/xaxpy.opencl8
-rw-r--r--src/kernels/level1/xcopy.opencl4
-rw-r--r--src/kernels/level1/xdot.opencl4
-rw-r--r--src/kernels/level1/xhad.opencl6
-rw-r--r--src/kernels/level1/xnrm2.opencl4
-rw-r--r--src/kernels/level1/xscal.opencl4
-rw-r--r--src/kernels/level1/xswap.opencl4
9 files changed, 21 insertions, 21 deletions
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl
index 3600b9d2..06a6773b 100644
--- a/src/kernels/level1/xamax.opencl
+++ b/src/kernels/level1/xamax.opencl
@@ -32,7 +32,7 @@ R"(
// The main reduction kernel, performing the loading and the majority of the operation
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1)))
#endif
void Xamax(const int n,
@@ -102,7 +102,7 @@ void Xamax(const int n,
// be launched with a single workgroup only.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1)))
#endif
void XamaxEpilogue(const __global singlereal* restrict maxgm,
diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl
index 875221f4..683c6fad 100644
--- a/src/kernels/level1/xasum.opencl
+++ b/src/kernels/level1/xasum.opencl
@@ -32,7 +32,7 @@ R"(
// The main reduction kernel, performing the loading and the majority of the operation
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1)))
#endif
void Xasum(const int n,
@@ -79,7 +79,7 @@ void Xasum(const int n,
// be launched with a single workgroup only.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1)))
#endif
void XasumEpilogue(const __global real* restrict input,
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl
index b20ad200..a106ed01 100644
--- a/src/kernels/level1/xaxpy.opencl
+++ b/src/kernels/level1/xaxpy.opencl
@@ -24,7 +24,7 @@ R"(
// Full version of the kernel with offsets and strided accesses
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void Xaxpy(const int n, const real_arg arg_alpha,
@@ -43,7 +43,7 @@ void Xaxpy(const int n, const real_arg arg_alpha,
// assumes that 'n' is dividable by 'VW' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XaxpyFaster(const int n, const real_arg arg_alpha,
@@ -67,7 +67,7 @@ void XaxpyFaster(const int n, const real_arg arg_alpha,
// dividable by 'VW', 'WGS' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XaxpyFastest(const int n, const real_arg arg_alpha,
@@ -89,7 +89,7 @@ void XaxpyFastest(const int n, const real_arg arg_alpha,
// Full version of the kernel with offsets and strided accesses: batched version
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XaxpyBatched(const int n, const __constant real_arg* arg_alphas,
diff --git a/src/kernels/level1/xcopy.opencl b/src/kernels/level1/xcopy.opencl
index 174bf0c6..493197af 100644
--- a/src/kernels/level1/xcopy.opencl
+++ b/src/kernels/level1/xcopy.opencl
@@ -24,7 +24,7 @@ R"(
// Full version of the kernel with offsets and strided accesses
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void Xcopy(const int n,
@@ -43,7 +43,7 @@ void Xcopy(const int n,
// dividable by 'VW', 'WGS' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XcopyFast(const int n,
diff --git a/src/kernels/level1/xdot.opencl b/src/kernels/level1/xdot.opencl
index e14b6306..64f6eb9d 100644
--- a/src/kernels/level1/xdot.opencl
+++ b/src/kernels/level1/xdot.opencl
@@ -32,7 +32,7 @@ R"(
// The main reduction kernel, performing the multiplication and the majority of the sum operation
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1)))
#endif
void Xdot(const int n,
@@ -78,7 +78,7 @@ void Xdot(const int n,
// be launched with a single workgroup only.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1)))
#endif
void XdotEpilogue(const __global real* restrict input,
diff --git a/src/kernels/level1/xhad.opencl b/src/kernels/level1/xhad.opencl
index aee98f91..47bb5170 100644
--- a/src/kernels/level1/xhad.opencl
+++ b/src/kernels/level1/xhad.opencl
@@ -68,7 +68,7 @@ INLINE_FUNC realV MultiplyVectorVector(realV cvec, const realV aval, const realV
// Full version of the kernel with offsets and strided accesses
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta,
@@ -96,7 +96,7 @@ void Xhad(const int n, const real_arg arg_alpha, const real_arg arg_beta,
// assumes that 'n' is dividable by 'VW' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta,
@@ -127,7 +127,7 @@ void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta,
// dividable by 'VW', 'WGS' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XhadFastest(const int n, const real_arg arg_alpha, const real_arg arg_beta,
diff --git a/src/kernels/level1/xnrm2.opencl b/src/kernels/level1/xnrm2.opencl
index fb45effb..36ea49b4 100644
--- a/src/kernels/level1/xnrm2.opencl
+++ b/src/kernels/level1/xnrm2.opencl
@@ -32,7 +32,7 @@ R"(
// The main reduction kernel, performing the multiplication and the majority of the operation
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1)))
#endif
void Xnrm2(const int n,
@@ -77,7 +77,7 @@ void Xnrm2(const int n,
// be launched with a single workgroup only.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1)))
#endif
void Xnrm2Epilogue(const __global real* restrict input,
diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl
index 19ca9135..e4260c7c 100644
--- a/src/kernels/level1/xscal.opencl
+++ b/src/kernels/level1/xscal.opencl
@@ -24,7 +24,7 @@ R"(
// Full version of the kernel with offsets and strided accesses
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void Xscal(const int n, const real_arg arg_alpha,
@@ -46,7 +46,7 @@ void Xscal(const int n, const real_arg arg_alpha,
// dividable by 'VW', 'WGS' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XscalFast(const int n, const real_arg arg_alpha,
diff --git a/src/kernels/level1/xswap.opencl b/src/kernels/level1/xswap.opencl
index a2b44de3..2d384423 100644
--- a/src/kernels/level1/xswap.opencl
+++ b/src/kernels/level1/xswap.opencl
@@ -24,7 +24,7 @@ R"(
// Full version of the kernel with offsets and strided accesses
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void Xswap(const int n,
@@ -45,7 +45,7 @@ void Xswap(const int n,
// dividable by 'VW', 'WGS' and 'WPT'.
#if RELAX_WORKGROUP_SIZE == 1
__kernel
-#elif
+#else
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
#endif
void XswapFast(const int n,