diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-06-03 15:53:27 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-06-03 15:53:27 +0200 |
commit | 1c9a74147073234da953b84f0bbafefbcf5ffb4f (patch) | |
tree | b29bae73160430bad665b6a947b0de9e2f6cdd46 /src/kernels | |
parent | 838422fbb1a8fa7ce2cad06bb94b2779d3929e08 (diff) | |
parent | 4471b67735fecc8089df638cc06c2d5bd3cd3d2c (diff) |
Merge branch 'master' into CLBlast-267-convgemm
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level2/xtrsv.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level3/invert_diagonal_blocks_part2.opencl | 24 | ||||
-rw-r--r-- | src/kernels/level3/level3.opencl | 2 |
3 files changed, 14 insertions, 14 deletions
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl index 8777eb77..e7b6ae79 100644 --- a/src/kernels/level2/xtrsv.opencl +++ b/src/kernels/level2/xtrsv.opencl @@ -18,7 +18,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_TRSV) -__kernel __attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void FillVector(const int n, const int inc, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); diff --git a/src/kernels/level3/invert_diagonal_blocks_part2.opencl b/src/kernels/level3/invert_diagonal_blocks_part2.opencl index 8736203c..8e9b583e 100644 --- a/src/kernels/level3/invert_diagonal_blocks_part2.opencl +++ b/src/kernels/level3/invert_diagonal_blocks_part2.opencl @@ -19,7 +19,7 @@ R"( #if defined(ROUTINE_INVERT) // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -28,7 +28,7 @@ void TripleMatMul16Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -36,7 +36,7 @@ void TripleMatMul16Part2Lower(int n, __global real* restrict dest, int current_s } // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -45,7 +45,7 @@ void TripleMatMul32Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -53,7 +53,7 @@ void TripleMatMul32Part2Lower(int n, __global real* restrict dest, int current_s } // B21 = A21 * B11 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part1Lower(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -62,7 +62,7 @@ void TripleMatMul64Part1Lower(int n, __global const real* restrict src, const in } // B21 = -B22 * B21 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part2Lower(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -72,7 +72,7 @@ void TripleMatMul64Part2Lower(int n, __global real* restrict dest, int current_s // ================================================================================================= // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -81,7 +81,7 @@ void TripleMatMul16Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(1 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul16Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -89,7 +89,7 @@ void TripleMatMul16Part2Upper(int n, __global real* restrict dest, int current_s } // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -98,7 +98,7 @@ void TripleMatMul32Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(2 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul32Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; @@ -106,7 +106,7 @@ void TripleMatMul32Part2Upper(int n, __global real* restrict dest, int current_s } // B12 = A12 * B22 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part1Upper(int n, __global const real* restrict src, const int a_offset, const int lda, __global real* restrict dest, int current_size, int num_pages, const int block_size) { @@ -115,7 +115,7 @@ void TripleMatMul64Part1Upper(int n, __global const real* restrict src, const in } // B12 = -B11 * B12 -__kernel __attribute__((reqd_work_group_size(4 * TMMWGSX, TMMWGSY, 1))) +__kernel void TripleMatMul64Part2Upper(int n, __global real* restrict dest, int current_size, int num_pages, const int block_size) { __local real lm[LOCALY * LOCALX]; diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl index c67851df..bea73daf 100644 --- a/src/kernels/level3/level3.opencl +++ b/src/kernels/level3/level3.opencl @@ -76,7 +76,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM) -__kernel __attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void FillMatrix(const int m, const int n, const int ld, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); |