summaryrefslogtreecommitdiff
path: root/src/kernels/level1
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-12-09 20:44:21 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-12-09 20:44:21 +0100
commit9f02fb542ca659bf58d1efefdb334ea386ef10e8 (patch)
tree8669e215f38e8b00a4ee73a436479207f2afdbf5 /src/kernels/level1
parentca5dbcd2bd31fb0a0e3f6c2f81b3c0fff6250738 (diff)
Completed kernel modifications for pre-processor of all other kernels
Diffstat (limited to 'src/kernels/level1')
-rw-r--r--src/kernels/level1/xamax.opencl2
-rw-r--r--src/kernels/level1/xasum.opencl2
-rw-r--r--src/kernels/level1/xcopy.opencl5
-rw-r--r--src/kernels/level1/xnrm2.opencl2
-rw-r--r--src/kernels/level1/xscal.opencl5
-rw-r--r--src/kernels/level1/xswap.opencl5
6 files changed, 6 insertions, 15 deletions
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl
index 2bd2f714..27add015 100644
--- a/src/kernels/level1/xamax.opencl
+++ b/src/kernels/level1/xamax.opencl
@@ -75,7 +75,6 @@ void Xamax(const int n,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS1/2; s>0; s=s>>1) {
if (lid < s) {
if (maxlm[lid + s] >= maxlm[lid]) {
@@ -117,7 +116,6 @@ void XamaxEpilogue(const __global singlereal* restrict maxgm,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS2/2; s>0; s=s>>1) {
if (lid < s) {
if (maxlm[lid + s] >= maxlm[lid]) {
diff --git a/src/kernels/level1/xasum.opencl b/src/kernels/level1/xasum.opencl
index 1fc91be8..29e7fa3e 100644
--- a/src/kernels/level1/xasum.opencl
+++ b/src/kernels/level1/xasum.opencl
@@ -56,7 +56,6 @@ void Xasum(const int n,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS1/2; s>0; s=s>>1) {
if (lid < s) {
Add(lm[lid], lm[lid], lm[lid + s]);
@@ -85,7 +84,6 @@ void XasumEpilogue(const __global real* restrict input,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS2/2; s>0; s=s>>1) {
if (lid < s) {
Add(lm[lid], lm[lid], lm[lid + s]);
diff --git a/src/kernels/level1/xcopy.opencl b/src/kernels/level1/xcopy.opencl
index 228e0735..aed80fc2 100644
--- a/src/kernels/level1/xcopy.opencl
+++ b/src/kernels/level1/xcopy.opencl
@@ -28,7 +28,6 @@ void Xcopy(const int n,
__global real* ygm, const int y_offset, const int y_inc) {
// Loops over the work that needs to be done (allows for an arbitrary number of threads)
- #pragma unroll
for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
ygm[id*y_inc + y_offset] = xgm[id*x_inc + x_offset];
}
@@ -43,8 +42,8 @@ void XcopyFast(const int n,
const __global realV* restrict xgm,
__global realV* ygm) {
#pragma unroll
- for (int w=0; w<WPT; ++w) {
- const int id = w*get_global_size(0) + get_global_id(0);
+ for (int _w = 0; _w < WPT; _w += 1) {
+ const int id = _w*get_global_size(0) + get_global_id(0);
ygm[id] = xgm[id];
}
}
diff --git a/src/kernels/level1/xnrm2.opencl b/src/kernels/level1/xnrm2.opencl
index f6d869cb..6a81c150 100644
--- a/src/kernels/level1/xnrm2.opencl
+++ b/src/kernels/level1/xnrm2.opencl
@@ -54,7 +54,6 @@ void Xnrm2(const int n,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS1/2; s>0; s=s>>1) {
if (lid < s) {
Add(lm[lid], lm[lid], lm[lid + s]);
@@ -83,7 +82,6 @@ void Xnrm2Epilogue(const __global real* restrict input,
barrier(CLK_LOCAL_MEM_FENCE);
// Performs reduction in local memory
- #pragma unroll
for (int s=WGS2/2; s>0; s=s>>1) {
if (lid < s) {
Add(lm[lid], lm[lid], lm[lid + s]);
diff --git a/src/kernels/level1/xscal.opencl b/src/kernels/level1/xscal.opencl
index 3da9c2fd..cb133e88 100644
--- a/src/kernels/level1/xscal.opencl
+++ b/src/kernels/level1/xscal.opencl
@@ -28,7 +28,6 @@ void Xscal(const int n, const real_arg arg_alpha,
const real alpha = GetRealArg(arg_alpha);
// Loops over the work that needs to be done (allows for an arbitrary number of threads)
- #pragma unroll
for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
real xvalue = xgm[id*x_inc + x_offset];
real result;
@@ -47,8 +46,8 @@ void XscalFast(const int n, const real_arg arg_alpha,
const real alpha = GetRealArg(arg_alpha);
#pragma unroll
- for (int w=0; w<WPT; ++w) {
- const int id = w*get_global_size(0) + get_global_id(0);
+ for (int _w = 0; _w < WPT; _w += 1) {
+ const int id = _w*get_global_size(0) + get_global_id(0);
realV xvalue = xgm[id];
realV result;
result = MultiplyVector(result, alpha, xvalue);
diff --git a/src/kernels/level1/xswap.opencl b/src/kernels/level1/xswap.opencl
index 267271c0..bf5b6194 100644
--- a/src/kernels/level1/xswap.opencl
+++ b/src/kernels/level1/xswap.opencl
@@ -28,7 +28,6 @@ void Xswap(const int n,
__global real* ygm, const int y_offset, const int y_inc) {
// Loops over the work that needs to be done (allows for an arbitrary number of threads)
- #pragma unroll
for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
real temp = xgm[id*x_inc + x_offset];
xgm[id*x_inc + x_offset] = ygm[id*y_inc + y_offset];
@@ -45,8 +44,8 @@ void XswapFast(const int n,
__global realV* xgm,
__global realV* ygm) {
#pragma unroll
- for (int w=0; w<WPT; ++w) {
- const int id = w*get_global_size(0) + get_global_id(0);
+ for (int _w = 0; _w < WPT; _w += 1) {
+ const int id = _w*get_global_size(0) + get_global_id(0);
realV temp = xgm[id];
xgm[id] = ygm[id];
ygm[id] = temp;