diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-31 20:09:49 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-05-31 20:09:49 +0200 |
commit | e609220393654e5400b0acd0d362367f5fe28ab8 (patch) | |
tree | abb4ed1edf5f483e4ecc89b68614b87c922e4af9 /src/kernels | |
parent | ff4d5558a6d945592cc51760628a77237cd30f67 (diff) |
Some potential fixes for error -54 when launching TRSV and TRSM kernels
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level2/xtrsv.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level3/invert_diagonal_blocks_part2.opencl | 24 | ||||
-rw-r--r-- | src/kernels/level3/level3.opencl | 2 |
3 files changed, 14 insertions, 14 deletions
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl index 8777eb77..e7b6ae79 100644 --- a/src/kernels/level2/xtrsv.opencl +++ b/src/kernels/level2/xtrsv.opencl @@ -18,7 +18,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_TRSV) -__kernel __attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void FillVector(const int n, const int inc, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); diff --git a/src/kernels/level3/invert_diagonal_blocks_part2.opencl b/src/kernels/level3/invert_diagonal_blocks_part2.opencl index 8736203c..8e9b583e 100644 --- a/src/kernels/level3/invert_diagonal_blocks_part2.opencl +++ b/src/kernels/level3/invert_diagonal_blocks_part2.opencl @@ -19,7 +19,7 @@ R"( #if defined(ROUTINE_INVERT) // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -28,7 +28,7 @@ void TripleMatMul16Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -36,7 +36,7 @@ void TripleMatMul16Part2Lower(int n, __global real* restrict dest, int current_s } // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -45,7 +45,7 @@ void TripleMatMul32Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -53,7 +53,7 @@ void TripleMatMul32Part2Lower(int n, __global real* restrict dest, int current_s } // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -62,7 +62,7 @@ void TripleMatMul64Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -72,7 +72,7 @@ void TripleMatMul64Part2Lower(int n, __global real* restrict dest, int current_s // ================================================================================================= // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -81,7 +81,7 @@ void TripleMatMul16Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -89,7 +89,7 @@ void TripleMatMul16Part2Upper(int n, __global real* restrict dest, int current_s } // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -98,7 +98,7 @@ void TripleMatMul32Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -106,7 +106,7 @@ void TripleMatMul32Part2Upper(int n, __global real* restrict dest, int current_s } // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -115,7 +115,7 @@ void TripleMatMul64Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl index c67851df..bea73daf 100644 --- a/src/kernels/level3/level3.opencl +++ b/src/kernels/level3/level3.opencl @@ -76,7 +76,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM) -__kernel __attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void FillMatrix(const int m, const int n, const int ld, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); |