summaryrefslogtreecommitdiff
path: root/src/kernels/common.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2023-01-21 20:28:32 +0100
committerGitHub <noreply@github.com>2023-01-21 20:28:32 +0100
commite72f87ae5eca5e2ea8aea4f2ce49408c1faa0521 (patch)
tree588a426b6350a5c982d89d98749ae78667fd23b4 /src/kernels/common.opencl
parent03cffa83c5f7742f8ec0c5e762bb7048e38952f3 (diff)
parent73f49e9b3d4abc4214122e4b8c07a736e01626ee (diff)
Merge pull request #451 from CodeLinaro/master
CLBlast modifications to address Qualcomm Adreno performance
Diffstat (limited to 'src/kernels/common.opencl')
-rw-r--r--src/kernels/common.opencl6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 56c1dae4..0ec741ad 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -132,6 +132,12 @@ R"(
#define USE_CL_MAD 0
#endif
+// By default the workgroup size requirement is enabled. For Qualcomm devices the workgroup size
+// requirement results in worse performance and is disabled (src/utilities/compile.cpp)
+#ifndef RELAX_WORKGROUP_SIZE
+ #define RELAX_WORKGROUP_SIZE 0
+#endif
+
// Sets a variable to zero
#if PRECISION == 3232 || PRECISION == 6464
#define SetToZero(a) a.x = ZERO; a.y = ZERO