summaryrefslogtreecommitdiff
path: root/src/kernels/level3/invert_diagonal_blocks.opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/level3/invert_diagonal_blocks.opencl')
-rw-r--r--src/kernels/level3/invert_diagonal_blocks.opencl18
1 files changed, 9 insertions, 9 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl
index 874c1510..93241700 100644
--- a/src/kernels/level3/invert_diagonal_blocks.opencl
+++ b/src/kernels/level3/invert_diagonal_blocks.opencl
@@ -164,10 +164,10 @@ void InvertDiagonalBlock(int n, __global const real* restrict src, const int src
// =================================================================================================
// Triple matrix-multiplication kernel: C = A * B
-inline void TripleMatMul(const int size, const bool upper, const int part, __local real* blm, int n,
- __global const real* agm, __global const real* bgm, __global real* cgm,
- const int lda, const int ldb, const int ldc,
- int current_size, int num_pages, const int block_size) {
+INLINE_FUNC void TripleMatMul(const int size, const bool upper, const int part, __local real* blm, int n,
+ __global const real* agm, __global const real* bgm, __global real* cgm,
+ const int lda, const int ldb, const int ldc,
+ int current_size, int num_pages, const int block_size) {
// Emulates a 3D grid: NX * (NY * num_pages)
const int by = get_group_id(1) / num_pages;
@@ -250,9 +250,9 @@ inline void TripleMatMul(const int size, const bool upper, const int part, __loc
// =================================================================================================
// Triple matrix-multiplication kernel part 1: B12 = A12 * B22 (upper) or B21 = A21 * B11 (lower)
-inline void TripleMatMulPart1(const int size, const bool upper, __local real* blm, int n,
- __global const real* src, const int a_offset, const int lda,
- __global real* dest, int current_size, int num_pages, const int block_size) {
+INLINE_FUNC void TripleMatMulPart1(const int size, const bool upper, __local real* blm, int n,
+ __global const real* src, const int a_offset, const int lda,
+ __global real* dest, int current_size, int num_pages, const int block_size) {
// Emulates a 3D grid: NX * (NY * num_pages)
const int page = get_group_id(1) % num_pages;
@@ -286,8 +286,8 @@ inline void TripleMatMulPart1(const int size, const bool upper, __local real* bl
}
// Triple matrix-multiplication kernel part 1: B12 = -B11 * B12 (upper) or B21 = -B22 * B21 (lower)
-inline void TripleMatMulPart2(const int size, const bool upper, __local real* blm, const int n,
- __global real* dest, int current_size, int num_pages, const int block_size) {
+INLINE_FUNC void TripleMatMulPart2(const int size, const bool upper, __local real* blm, const int n,
+ __global real* dest, int current_size, int num_pages, const int block_size) {
// Emulates a 3D grid: NX * (NY * num_pages)
const int page = get_group_id(1) % num_pages;