summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-01-18 21:29:59 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-01-18 21:29:59 +0100
commitdf9a77d74d87fb8832264e9e9a37336001873151 (patch)
tree516e113140164daa0d918803dee64b94b685afb6 /src/kernels
parent4b3ffd998904f5c848edc5917308f5942fa71da3 (diff)
Added first version of the TRSM routine based on the diagonal invert kernel
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level3/invert_diagonal_blocks.opencl13
-rw-r--r--src/kernels/level3/level3.opencl16
2 files changed, 16 insertions, 13 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl
index 9231d725..e94b4d30 100644
--- a/src/kernels/level3/invert_diagonal_blocks.opencl
+++ b/src/kernels/level3/invert_diagonal_blocks.opencl
@@ -61,19 +61,6 @@ R"(
// =================================================================================================
-__kernel __attribute__((reqd_work_group_size(8, 8, 1)))
-void FillMatrix(const int n, const int ld, const int offset,
- __global real* restrict dest, const real_arg arg_value) {
- const real value = GetRealArg(arg_value);
- const int id_one = get_global_id(0);
- const int id_two = get_global_id(1);
- if (id_one < ld && id_two < n) {
- dest[id_two*ld + id_one + offset] = value;
- }
-}
-
-// =================================================================================================
-
// Inverts a diagonal block of INTERNAL_BLOCK_SIZE by INTERNAL_BLOCK_SIZE elements in a larger matrix
__kernel __attribute__((reqd_work_group_size(INTERNAL_BLOCK_SIZE, 1, 1)))
void InvertDiagonalBlock(int n, __global const real* restrict src, const int src_offset, const int src_ld,
diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl
index bf14ab12..0f5a8607 100644
--- a/src/kernels/level3/level3.opencl
+++ b/src/kernels/level3/level3.opencl
@@ -74,6 +74,22 @@ R"(
#endif
// =================================================================================================
+#if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM)
+
+__kernel __attribute__((reqd_work_group_size(8, 8, 1)))
+void FillMatrix(const int n, const int ld, const int offset,
+ __global real* restrict dest, const real_arg arg_value) {
+ const real value = GetRealArg(arg_value);
+ const int id_one = get_global_id(0);
+ const int id_two = get_global_id(1);
+ if (id_one < ld && id_two < n) {
+ dest[id_two*ld + id_one + offset] = value;
+ }
+}
+
+#endif
+
+// =================================================================================================
// End of the C++11 raw string literal
)"