From 7a756cbce7e3e025ec9fbadd717a32d4711262ad Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 15 Mar 2018 20:58:42 +0100 Subject: Fixed a failing TRSV test using a CPU with Apple OpenCL --- src/database/apple_cpu_fallback.hpp | 2 +- src/kernels/level2/xtrsv.opencl | 2 +- src/routines/common.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp index e1aa4661..8d257b5e 100644 --- a/src/database/apple_cpu_fallback.hpp +++ b/src/database/apple_cpu_fallback.hpp @@ -41,7 +41,7 @@ const DatabaseEntry XgerApple = { "Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry XtrsvApple = { - "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } + "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry XgemmApple = { "Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } } diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl index ebea77a3..8777eb77 100644 --- a/src/kernels/level2/xtrsv.opencl +++ b/src/kernels/level2/xtrsv.opencl @@ -18,7 +18,7 @@ R"( // ================================================================================================= #if defined(ROUTINE_TRSV) -__kernel __attribute__((reqd_work_group_size(64, 1, 1))) +__kernel __attribute__((reqd_work_group_size(16, 1, 1))) void FillVector(const int n, const int inc, const int offset, __global real* restrict dest, const real_arg arg_value) { const real value = GetRealArg(arg_value); diff --git a/src/routines/common.cpp b/src/routines/common.cpp index 5b178e53..f50e75cf 100644 --- a/src/routines/common.cpp +++ b/src/routines/common.cpp @@ -125,8 +125,8 @@ void FillVector(Queue &queue, const Device &device, kernel.SetArgument(2, static_cast(offset)); kernel.SetArgument(3, dest()); kernel.SetArgument(4, GetRealArg(constant_value)); - auto local = std::vector{64}; - auto global = std::vector{Ceil(n, 64)}; + auto local = std::vector{16}; + auto global = std::vector{Ceil(n, 16)}; RunKernel(kernel, queue, device, global, local, event, waitForEvents); } -- cgit v1.2.3