diff options
-rw-r--r-- | src/kernels/level3/level3.opencl | 2 | ||||
-rw-r--r-- | src/routines/common.cpp | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl index 5ba8cf29..c67851df 100644 --- a/src/kernels/level3/level3.opencl +++ b/src/kernels/level3/level3.opencl @@ -76,7 +76,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM) -__kernel __attribute__((reqd_work_group_size(8, 8, 1))) +__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void FillMatrix(const int m, const int n, const int ld, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); diff --git a/src/routines/common.cpp b/src/routines/common.cpp index f50e75cf..a4d1f577 100644 --- a/src/routines/common.cpp +++ b/src/routines/common.cpp @@ -89,8 +89,8 @@ void FillMatrix(Queue &queue, const Device &device, kernel.SetArgument(3, static_cast<int>(offset)); kernel.SetArgument(4, dest()); kernel.SetArgument(5, GetRealArg(constant_value)); - auto local = std::vector<size_t>{8, 8}; - auto global = std::vector<size_t>{Ceil(m, 8), Ceil(n, 8)}; + auto local = std::vector<size_t>{16, 1}; + auto global = std::vector<size_t>{Ceil(m, 16), n}; RunKernel(kernel, queue, device, global, local, event, waitForEvents); } |