From df9a77d74d87fb8832264e9e9a37336001873151 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 18 Jan 2017 21:29:59 +0100 Subject: Added first version of the TRSM routine based on the diagonal invert kernel --- src/kernels/level3/invert_diagonal_blocks.opencl | 13 ------------- src/kernels/level3/level3.opencl | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) (limited to 'src/kernels') diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl index 9231d725..e94b4d30 100644 --- a/src/kernels/level3/invert_diagonal_blocks.opencl +++ b/src/kernels/level3/invert_diagonal_blocks.opencl @@ -61,19 +61,6 @@ R"( // ================================================================================================= -__kernel __attribute__((reqd_work_group_size(8, 8, 1))) -void FillMatrix(const int n, const int ld, const int offset, - __global real* restrict dest, const real_arg arg_value) { - const real value = GetRealArg(arg_value); - const int id_one = get_global_id(0); - const int id_two = get_global_id(1); - if (id_one < ld && id_two < n) { - dest[id_two*ld + id_one + offset] = value; - } -} - -// ================================================================================================= - // Inverts a diagonal block of INTERNAL_BLOCK_SIZE by INTERNAL_BLOCK_SIZE elements in a larger matrix __kernel __attribute__((reqd_work_group_size(INTERNAL_BLOCK_SIZE, 1, 1))) void InvertDiagonalBlock(int n, __global const real* restrict src, const int src_offset, const int src_ld, diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl index bf14ab12..0f5a8607 100644 --- a/src/kernels/level3/level3.opencl +++ b/src/kernels/level3/level3.opencl @@ -73,6 +73,22 @@ R"( #define PADTRA_PAD 0 // Padding of the local memory to avoid bank-conflicts #endif +// ================================================================================================= +#if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM) + +__kernel __attribute__((reqd_work_group_size(8, 8, 1))) +void FillMatrix(const int n, const int ld, const int offset, + __global real* restrict dest, const real_arg arg_value) { + const real value = GetRealArg(arg_value); + const int id_one = get_global_id(0); + const int id_two = get_global_id(1); + if (id_one < ld && id_two < n) { + dest[id_two*ld + id_one + offset] = value; + } +} + +#endif + // ================================================================================================= // End of the C++11 raw string literal -- cgit v1.2.3