Made the inline keyword in kernels optional currently only enabled for NVIDIA and ARM GPUs

author: Cedric Nugteren <web@cedricnugteren.nl> 2017-07-08 17:12:16 +0200
committer: Cedric Nugteren <web@cedricnugteren.nl> 2017-07-08 17:12:16 +0200
commit: 442c31dd508c573023594a803160ddb69d4929f2 (patch)
tree: 55474d09086481117204626b27cbec4ee465be9a /src/kernels/level2
parent: 75c0e861b842dbd08def5e55696fd79d713afc96 (diff)
3 files changed, 17 insertions, 17 deletions
diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl
index be979766..505231ca 100644
--- a/src/kernels/level2/level2.opencl
+++ b/src/kernels/level2/level2.opencl
@@ -33,9 +33,9 @@ R"(
 // =================================================================================================
 
 // Returns an element from a vector
-inline real LoadVector(const int id, const int max,
-                       __global real* gm, const int offset, const int inc,
-                       const int do_conjugate) {
+INLINE_FUNC real LoadVector(const int id, const int max,
+                            __global real* gm, const int offset, const int inc,
+                            const int do_conjugate) {
   if (id < max) {
     real result = gm[id*inc + offset];
     if (do_conjugate) {
@@ -53,10 +53,10 @@ inline real LoadVector(const int id, const int max,
 }
 
 // Performs the rank-1 matrix update
-inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
-                         __global real* agm, const int a_offset, const int a_ld,
-                         const real alpha, const real xvalue, const real yvalue,
-                         const int is_upper) {
+INLINE_FUNC void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
+                              __global real* agm, const int a_offset, const int a_ld,
+                              const real alpha, const real xvalue, const real yvalue,
+                              const int is_upper) {
 
   // Bounds of a regular matrix
   if (id1 < max1 && id2 < max2) {
@@ -100,11 +100,11 @@ inline void MatrixUpdate(const int id1, const int id2, const int max1, const int
 }
 
 // Performs the rank-2 matrix update
-inline void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2,
-                          __global real* agm, const int a_offset, const int a_ld,
-                          const real alpha1, const real xvalue, const real yvalue,
-                          const real alpha2, const real xtvalue, const real ytvalue,
-                          const int is_upper) {
+INLINE_FUNC void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2,
+                               __global real* agm, const int a_offset, const int a_ld,
+                               const real alpha1, const real xvalue, const real yvalue,
+                               const real alpha2, const real xtvalue, const real ytvalue,
+                               const int is_upper) {
 
   // Bounds of a regular matrix
   if (id1 < max1 && id2 < max2) {
diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl
index ff011acd..ea0478f0 100644
--- a/src/kernels/level2/xgemv.opencl
+++ b/src/kernels/level2/xgemv.opencl
@@ -36,9 +36,9 @@ R"(
 // =================================================================================================
 
 // Defines how to load the input matrix in the non-vectorized case
-inline real LoadMatrixA(const __global real* restrict agm, const int x, const int y,
-                        const int a_ld, const int a_offset, const int parameter,
-                        const int kl, const int ku) {
+INLINE_FUNC real LoadMatrixA(const __global real* restrict agm, const int x, const int y,
+                             const int a_ld, const int a_offset, const int parameter,
+                             const int kl, const int ku) {
   real result;
 
   // For banded matrices
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 02a1f956..8a08f076 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -75,8 +75,8 @@ R"(
 // =================================================================================================
 
 // Loads a vector input value
-inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y,
-                            const int a_ld) {
+INLINE_FUNC realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y,
+                                 const int a_ld) {
   return agm[a_ld*y + x];
 }
author	Cedric Nugteren <web@cedricnugteren.nl>	2017-07-08 17:12:16 +0200
committer	Cedric Nugteren <web@cedricnugteren.nl>	2017-07-08 17:12:16 +0200
commit	442c31dd508c573023594a803160ddb69d4929f2 (patch)
tree	55474d09086481117204626b27cbec4ee465be9a /src/kernels/level2
parent	75c0e861b842dbd08def5e55696fd79d713afc96 (diff)