diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-09 20:44:21 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-09 20:44:21 +0100 |
commit | 9f02fb542ca659bf58d1efefdb334ea386ef10e8 (patch) | |
tree | 8669e215f38e8b00a4ee73a436479207f2afdbf5 /src/kernels/level1 | |
parent | ca5dbcd2bd31fb0a0e3f6c2f81b3c0fff6250738 (diff) |
Completed kernel modifications for pre-processor of all other kernels
Diffstat (limited to 'src/kernels/level1')
-rw-r--r-- | src/kernels/level1/xamax.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level1/xasum.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level1/xcopy.opencl | 5 | ||||
-rw-r--r-- | src/kernels/level1/xnrm2.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level1/xscal.opencl | 5 | ||||
-rw-r--r-- | src/kernels/level1/xswap.opencl | 5 |
6 files changed, 6 insertions, 15 deletions
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl index 2bd2f714..27add015 100644 --- a/src/kernels/level1/xamax.opencl +++ b/src/kernels/level1/xamax.opencl @@ -75,7 +75,6 @@ void Xamax(const int n, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS1/2; s>0; s=s>>1) { if (lid < s) { if (maxlm[lid + s] >= maxlm[lid]) { @@ -117,7 +116,6 @@ void XamaxEpilogue(const __global singlereal* restrict maxgm, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { if (maxlm[lid + s] >= maxlm[lid]) { diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl index 1fc91be8..29e7fa3e 100644 --- a/src/kernels/level1/xasum.opencl +++ b/src/kernels/level1/xasum.opencl @@ -56,7 +56,6 @@ void Xasum(const int n, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS1/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); @@ -85,7 +84,6 @@ void XasumEpilogue(const __global real* restrict input, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); diff --git a/src/kernels/level1/xcopy.opencl b/src/kernels/level1/xcopy.opencl index 228e0735..aed80fc2 100644 --- a/src/kernels/level1/xcopy.opencl +++ b/src/kernels/level1/xcopy.opencl @@ -28,7 +28,6 @@ void Xcopy(const int n, __global real* ygm, const int y_offset, const int y_inc) { // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll for (int id = get_global_id(0); id<n; id += get_global_size(0)) { ygm[id*y_inc + y_offset] = xgm[id*x_inc + x_offset]; } @@ -43,8 +42,8 @@ void XcopyFast(const int n, const __global realV* restrict xgm, __global realV* ygm) { #pragma unroll - for (int w=0; w<WPT; ++w) { - const int id = w*get_global_size(0) + get_global_id(0); + for (int _w = 0; _w < WPT; _w += 1) { + const int id = _w*get_global_size(0) + get_global_id(0); ygm[id] = xgm[id]; } } diff --git a/src/kernels/level1/xnrm2.opencl b/src/kernels/level1/xnrm2.opencl index f6d869cb..6a81c150 100644 --- a/src/kernels/level1/xnrm2.opencl +++ b/src/kernels/level1/xnrm2.opencl @@ -54,7 +54,6 @@ void Xnrm2(const int n, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS1/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); @@ -83,7 +82,6 @@ void Xnrm2Epilogue(const __global real* restrict input, barrier(CLK_LOCAL_MEM_FENCE); // Performs reduction in local memory - #pragma unroll for (int s=WGS2/2; s>0; s=s>>1) { if (lid < s) { Add(lm[lid], lm[lid], lm[lid + s]); diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl index 3da9c2fd..cb133e88 100644 --- a/src/kernels/level1/xscal.opencl +++ b/src/kernels/level1/xscal.opencl @@ -28,7 +28,6 @@ void Xscal(const int n, const real_arg arg_alpha, const real alpha = GetRealArg(arg_alpha); // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll for (int id = get_global_id(0); id<n; id += get_global_size(0)) { real xvalue = xgm[id*x_inc + x_offset]; real result; @@ -47,8 +46,8 @@ void XscalFast(const int n, const real_arg arg_alpha, const real alpha = GetRealArg(arg_alpha); #pragma unroll - for (int w=0; w<WPT; ++w) { - const int id = w*get_global_size(0) + get_global_id(0); + for (int _w = 0; _w < WPT; _w += 1) { + const int id = _w*get_global_size(0) + get_global_id(0); realV xvalue = xgm[id]; realV result; result = MultiplyVector(result, alpha, xvalue); diff --git a/src/kernels/level1/xswap.opencl b/src/kernels/level1/xswap.opencl index 267271c0..bf5b6194 100644 --- a/src/kernels/level1/xswap.opencl +++ b/src/kernels/level1/xswap.opencl @@ -28,7 +28,6 @@ void Xswap(const int n, __global real* ygm, const int y_offset, const int y_inc) { // Loops over the work that needs to be done (allows for an arbitrary number of threads) - #pragma unroll for (int id = get_global_id(0); id<n; id += get_global_size(0)) { real temp = xgm[id*x_inc + x_offset]; xgm[id*x_inc + x_offset] = ygm[id*y_inc + y_offset]; @@ -45,8 +44,8 @@ void XswapFast(const int n, __global realV* xgm, __global realV* ygm) { #pragma unroll - for (int w=0; w<WPT; ++w) { - const int id = w*get_global_size(0) + get_global_id(0); + for (int _w = 0; _w < WPT; _w += 1) { + const int id = _w*get_global_size(0) + get_global_id(0); realV temp = xgm[id]; xgm[id] = ygm[id]; ygm[id] = temp; |