From 4f394608a28f419dfd6091c704148d9e638a26f0 Mon Sep 17 00:00:00 2001 From: "Angus, Alexander" Date: Tue, 3 Jan 2023 10:56:04 -0800 Subject: implemented changes to boost Adreno performance according to https://jira-dc.qualcomm.com/jira/browse/OSR-8731 --- src/kernels/common.opencl | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/kernels/common.opencl') diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl index 56c1dae4..0ec741ad 100644 --- a/src/kernels/common.opencl +++ b/src/kernels/common.opencl @@ -132,6 +132,12 @@ R"( #define USE_CL_MAD 0 #endif +// By default the workgroup size requirement is enabled. For Qualcomm devices the workgroup size +// requirement results in worse performance and is disabled (src/utilities/compile.cpp) +#ifndef RELAX_WORKGROUP_SIZE + #define RELAX_WORKGROUP_SIZE 0 +#endif + // Sets a variable to zero #if PRECISION == 3232 || PRECISION == 6464 #define SetToZero(a) a.x = ZERO; a.y = ZERO -- cgit v1.2.3