summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-06-08 10:13:37 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-06-08 10:13:37 +0200
commit6925003e45e5c681aaeb26c95ffa29275cebcaac (patch)
treeef088f43ab7aeec82ecfe85cec9c145ef1d46ff0 /include
parent6d6b0300532a48fe9f638898b630891d38173538 (diff)
Added global memory synchronisation for better cache performance on ARM Mali GPUs
Diffstat (limited to 'include')
-rw-r--r--include/internal/tuning.h5
1 files changed, 5 insertions, 0 deletions
diff --git a/include/internal/tuning.h b/include/internal/tuning.h
index 3eba6fdb..8fc79aff 100644
--- a/include/internal/tuning.h
+++ b/include/internal/tuning.h
@@ -52,6 +52,7 @@ void Tuner(int argc, char* argv[]) {
// Tests for validity of the precision and retrieves properties
auto isAMD = false;
+ auto isARM = false;
auto isGPU = false;
{
const auto platform = Platform(args.platform_id);
@@ -61,6 +62,7 @@ void Tuner(int argc, char* argv[]) {
return;
}
isAMD = device.Vendor() == "AMD" || device.Vendor() == "Advanced Micro Devices, Inc.";
+ isARM = device.Vendor() == "ARM";
isGPU = device.Type() == "GPU";
}
@@ -96,6 +98,9 @@ void Tuner(int argc, char* argv[]) {
defines += "#define USE_CL_MAD 1\n";
defines += "#define USE_STAGGERED_INDICES 1\n";
}
+ if (isARM && isGPU) {
+ defines += "#define GLOBAL_MEM_FENCE 1\n";
+ }
// Loads the kernel sources and defines the kernel to tune
auto sources = defines + C::GetSources();