summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-15 21:09:52 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-15 21:09:52 +0100
commit52791bf3553bb47a50dea4ac234f7e1b09c4383c (patch)
treeeca393f57e6ac7358bba255ee454e0652c2a4506
parent7a756cbce7e3e025ec9fbadd717a32d4711262ad (diff)
Fixed a failing TRSM test using a CPU with Apple OpenCL
-rw-r--r--src/kernels/level3/level3.opencl2
-rw-r--r--src/routines/common.cpp4
2 files changed, 3 insertions, 3 deletions
diff --git a/src/kernels/level3/level3.opencl b/src/kernels/level3/level3.opencl
index 5ba8cf29..c67851df 100644
--- a/src/kernels/level3/level3.opencl
+++ b/src/kernels/level3/level3.opencl
@@ -76,7 +76,7 @@ R"(
// =================================================================================================
#if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM)
-__kernel __attribute__((reqd_work_group_size(8, 8, 1)))
+__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
void FillMatrix(const int m, const int n, const int ld, const int offset,
__global real* restrict dest, const real_arg arg_value) {
const real value = GetRealArg(arg_value);
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index f50e75cf..a4d1f577 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -89,8 +89,8 @@ void FillMatrix(Queue &queue, const Device &device,
kernel.SetArgument(3, static_cast<int>(offset));
kernel.SetArgument(4, dest());
kernel.SetArgument(5, GetRealArg(constant_value));
- auto local = std::vector<size_t>{8, 8};
- auto global = std::vector<size_t>{Ceil(m, 8), Ceil(n, 8)};
+ auto local = std::vector<size_t>{16, 1};
+ auto global = std::vector<size_t>{Ceil(m, 16), n};
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}