summaryrefslogtreecommitdiff
path: root/src/kernels/level2
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-07-08 17:12:16 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-07-08 17:12:16 +0200
commit442c31dd508c573023594a803160ddb69d4929f2 (patch)
tree55474d09086481117204626b27cbec4ee465be9a /src/kernels/level2
parent75c0e861b842dbd08def5e55696fd79d713afc96 (diff)
Made the inline keyword in kernels optional currently only enabled for NVIDIA and ARM GPUs
Diffstat (limited to 'src/kernels/level2')
-rw-r--r--src/kernels/level2/level2.opencl24
-rw-r--r--src/kernels/level2/xgemv.opencl6
-rw-r--r--src/kernels/level2/xgemv_fast.opencl4
3 files changed, 17 insertions, 17 deletions
diff --git a/src/kernels/level2/level2.opencl b/src/kernels/level2/level2.opencl
index be979766..505231ca 100644
--- a/src/kernels/level2/level2.opencl
+++ b/src/kernels/level2/level2.opencl
@@ -33,9 +33,9 @@ R"(
// =================================================================================================
// Returns an element from a vector
-inline real LoadVector(const int id, const int max,
- __global real* gm, const int offset, const int inc,
- const int do_conjugate) {
+INLINE_FUNC real LoadVector(const int id, const int max,
+ __global real* gm, const int offset, const int inc,
+ const int do_conjugate) {
if (id < max) {
real result = gm[id*inc + offset];
if (do_conjugate) {
@@ -53,10 +53,10 @@ inline real LoadVector(const int id, const int max,
}
// Performs the rank-1 matrix update
-inline void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
- __global real* agm, const int a_offset, const int a_ld,
- const real alpha, const real xvalue, const real yvalue,
- const int is_upper) {
+INLINE_FUNC void MatrixUpdate(const int id1, const int id2, const int max1, const int max2,
+ __global real* agm, const int a_offset, const int a_ld,
+ const real alpha, const real xvalue, const real yvalue,
+ const int is_upper) {
// Bounds of a regular matrix
if (id1 < max1 && id2 < max2) {
@@ -100,11 +100,11 @@ inline void MatrixUpdate(const int id1, const int id2, const int max1, const int
}
// Performs the rank-2 matrix update
-inline void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2,
- __global real* agm, const int a_offset, const int a_ld,
- const real alpha1, const real xvalue, const real yvalue,
- const real alpha2, const real xtvalue, const real ytvalue,
- const int is_upper) {
+INLINE_FUNC void MatrixUpdate2(const int id1, const int id2, const int max1, const int max2,
+ __global real* agm, const int a_offset, const int a_ld,
+ const real alpha1, const real xvalue, const real yvalue,
+ const real alpha2, const real xtvalue, const real ytvalue,
+ const int is_upper) {
// Bounds of a regular matrix
if (id1 < max1 && id2 < max2) {
diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl
index ff011acd..ea0478f0 100644
--- a/src/kernels/level2/xgemv.opencl
+++ b/src/kernels/level2/xgemv.opencl
@@ -36,9 +36,9 @@ R"(
// =================================================================================================
// Defines how to load the input matrix in the non-vectorized case
-inline real LoadMatrixA(const __global real* restrict agm, const int x, const int y,
- const int a_ld, const int a_offset, const int parameter,
- const int kl, const int ku) {
+INLINE_FUNC real LoadMatrixA(const __global real* restrict agm, const int x, const int y,
+ const int a_ld, const int a_offset, const int parameter,
+ const int kl, const int ku) {
real result;
// For banded matrices
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 02a1f956..8a08f076 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -75,8 +75,8 @@ R"(
// =================================================================================================
// Loads a vector input value
-inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y,
- const int a_ld) {
+INLINE_FUNC realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, const int y,
+ const int a_ld) {
return agm[a_ld*y + x];
}