summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authoretomzak <etomzak@users.noreply.github.com>2019-08-26 17:41:07 +0100
committeretomzak <etomzak@users.noreply.github.com>2019-09-04 12:55:25 +0100
commit9560193a9e4de27889e3980c8353a050ff2e00da (patch)
treeedb452a4e5b42ff6a8a3ad1c45925fe5d4b8bab4 /src
parentec501055f93f6f46fa0d0ea1208eeda0e9190518 (diff)
Fix out-of-bounds read/write in XhadFaster
Fix an error in XhadFaster where data would be written beyond the end of zgm. The kernel loop assumed that there was always enough work for each thread to process WPT items, but this was not enforced. It's possible to detect the overflow with the "canary" buffer regions, but for SHAD, kCanarySize must be ~500 (much larger than the normal 127). This commit may improve the performance of XhadFaster, since the kernel was performing 2x work in some cases (once over real data, once over garbage). Courtesy of Codeplay Software Ltd.
Diffstat (limited to 'src')
-rw-r--r--src/kernels/level1/xhad.opencl6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/kernels/level1/xhad.opencl b/src/kernels/level1/xhad.opencl
index 3880b7a4..24e0c76c 100644
--- a/src/kernels/level1/xhad.opencl
+++ b/src/kernels/level1/xhad.opencl
@@ -97,10 +97,12 @@ void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta,
const real alpha = GetRealArg(arg_alpha);
const real beta = GetRealArg(arg_beta);
- if (get_global_id(0) < n / (VW)) {
+ const int num_desired_threads = n / (VW * WPT);
+
+ if (get_global_id(0) < num_desired_threads) {
#pragma unroll
for (int _w = 0; _w < WPT; _w += 1) {
- const int id = _w*get_global_size(0) + get_global_id(0);
+ const int id = _w * num_desired_threads + get_global_id(0);
realV xvalue = xgm[id];
realV yvalue = ygm[id];
realV zvalue = zgm[id];