From 9f02fb542ca659bf58d1efefdb334ea386ef10e8 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 9 Dec 2017 20:44:21 +0100 Subject: Completed kernel modifications for pre-processor of all other kernels --- src/kernels/level1/xamax.opencl | 2 -- src/kernels/level1/xasum.opencl | 2 -- src/kernels/level1/xcopy.opencl | 5 ++--- src/kernels/level1/xnrm2.opencl | 2 -- src/kernels/level1/xscal.opencl | 5 ++--- src/kernels/level1/xswap.opencl | 5 ++--- 6 files changed, 6 insertions(+), 15 deletions(-) (limited to 'src/kernels/level1') diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl index 2bd2f714..27add015 100644 --- a/src/kernels/level1/xamax.opencl +++ b/src/kernels/level1/xamax.opencl @@ -75,7 +75,6 @@ void Xamax(const int n, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS1/2; s>0; s=s>>1) { if (lid < s) { if (maxlm[lid + s] >= maxlm[lid]) { @@ -117,7 +116,6 @@ void XamaxEpilogue(const __global singlereal* restrict maxgm, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { if (maxlm[lid + s] >= maxlm[lid]) { diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl index 1fc91be8..29e7fa3e 100644 --- a/src/kernels/level1/xasum.opencl +++ b/src/kernels/level1/xasum.opencl @@ -56,7 +56,6 @@ void Xasum(const int n, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS1/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); @@ -85,7 +84,6 @@ void XasumEpilogue(const __global real* restrict input, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); diff --git a/src/kernels/level1/xcopy.opencl b/src/kernels/level1/xcopy.opencl index 228e0735..aed80fc2 100644 --- a/src/kernels/level1/xcopy.opencl +++ b/src/kernels/level1/xcopy.opencl @@ -28,7 +28,6 @@ void Xcopy(const int n, __global real* ygm, const int y_offset, const int y_inc) { // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll for (int id = get_global_id(0); id0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); @@ -83,7 +82,6 @@ void Xnrm2Epilogue(const __global real* restrict input, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl index 3da9c2fd..cb133e88 100644 --- a/src/kernels/level1/xscal.opencl +++ b/src/kernels/level1/xscal.opencl @@ -28,7 +28,6 @@ void Xscal(const int n, const real_arg arg_alpha, const real alpha = GetRealArg(arg_alpha); // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll for (int id = get_global_id(0); id