summaryrefslogtreecommitdiff
path: root/src/kernels/common.opencl
diff options
context:
space:
mode:
authorAngus, Alexander <aangus@qti.qualcomm.com>2023-01-03 10:56:04 -0800
committerAngus, Alexander <aangus@qti.qualcomm.com>2023-01-03 10:56:04 -0800
commit4f394608a28f419dfd6091c704148d9e638a26f0 (patch)
tree4c0e042109c4d249ff5b700fc49a862169edec5a /src/kernels/common.opencl
parent03cffa83c5f7742f8ec0c5e762bb7048e38952f3 (diff)
implemented changes to boost Adreno performance according to https://jira-dc.qualcomm.com/jira/browse/OSR-8731
Diffstat (limited to 'src/kernels/common.opencl')
-rw-r--r--src/kernels/common.opencl6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl
index 56c1dae4..0ec741ad 100644
--- a/src/kernels/common.opencl
+++ b/src/kernels/common.opencl
@@ -132,6 +132,12 @@ R"(
#define USE_CL_MAD 0
#endif
+// By default the workgroup size requirement is enabled. For Qualcomm devices the workgroup size
+// requirement results in worse performance and is disabled (src/utilities/compile.cpp)
+#ifndef RELAX_WORKGROUP_SIZE
+ #define RELAX_WORKGROUP_SIZE 0
+#endif
+
// Sets a variable to zero
#if PRECISION == 3232 || PRECISION == 6464
#define SetToZero(a) a.x = ZERO; a.y = ZERO