From 9560193a9e4de27889e3980c8353a050ff2e00da Mon Sep 17 00:00:00 2001 From: etomzak Date: Mon, 26 Aug 2019 17:41:07 +0100 Subject: Fix out-of-bounds read/write in XhadFaster Fix an error in XhadFaster where data would be written beyond the end of zgm. The kernel loop assumed that there was always enough work for each thread to process WPT items, but this was not enforced. It's possible to detect the overflow with the "canary" buffer regions, but for SHAD, kCanarySize must be ~500 (much larger than the normal 127). This commit may improve the performance of XhadFaster, since the kernel was performing 2x work in some cases (once over real data, once over garbage). Courtesy of Codeplay Software Ltd. --- src/kernels/level1/xhad.opencl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/kernels/level1/xhad.opencl b/src/kernels/level1/xhad.opencl index 3880b7a4..24e0c76c 100644 --- a/src/kernels/level1/xhad.opencl +++ b/src/kernels/level1/xhad.opencl @@ -97,10 +97,12 @@ void XhadFaster(const int n, const real_arg arg_alpha, const real_arg arg_beta, const real alpha = GetRealArg(arg_alpha); const real beta = GetRealArg(arg_beta); - if (get_global_id(0) < n / (VW)) { + const int num_desired_threads = n / (VW * WPT); + + if (get_global_id(0) < num_desired_threads) { #pragma unroll for (int _w = 0; _w < WPT; _w += 1) { - const int id = _w*get_global_size(0) + get_global_id(0); + const int id = _w * num_desired_threads + get_global_id(0); realV xvalue = xgm[id]; realV yvalue = ygm[id]; realV zvalue = zgm[id]; -- cgit v1.2.3