summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_part2.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-06-08 10:13:37 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-06-08 10:13:37 +0200
commit6925003e45e5c681aaeb26c95ffa29275cebcaac (patch)
treeef088f43ab7aeec82ecfe85cec9c145ef1d46ff0 /src/kernels/level3/xgemm_part2.opencl
parent6d6b0300532a48fe9f638898b630891d38173538 (diff)
Added global memory synchronisation for better cache performance on ARM Mali GPUs
Diffstat (limited to 'src/kernels/level3/xgemm_part2.opencl')
-rw-r--r--src/kernels/level3/xgemm_part2.opencl3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/kernels/level3/xgemm_part2.opencl b/src/kernels/level3/xgemm_part2.opencl
index 56ccdb96..42c1127c 100644
--- a/src/kernels/level3/xgemm_part2.opencl
+++ b/src/kernels/level3/xgemm_part2.opencl
@@ -258,6 +258,9 @@ inline void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
barrier(CLK_LOCAL_MEM_FENCE);
#endif
}
+ #if GLOBAL_MEM_FENCE == 1
+ barrier(CLK_GLOBAL_MEM_FENCE);
+ #endif
}
// =================================================================================================