summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-15 20:58:42 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-15 20:58:42 +0100
commit7a756cbce7e3e025ec9fbadd717a32d4711262ad (patch)
tree292cef2b032079d31e5f6cf5fd804bb7ee4110e6
parentf4d96e80c385404a59a17588386d4115fd0bef6b (diff)
Fixed a failing TRSV test using a CPU with Apple OpenCL
-rw-r--r--src/database/apple_cpu_fallback.hpp2
-rw-r--r--src/kernels/level2/xtrsv.opencl2
-rw-r--r--src/routines/common.cpp4
3 files changed, 4 insertions, 4 deletions
diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp
index e1aa4661..8d257b5e 100644
--- a/src/database/apple_cpu_fallback.hpp
+++ b/src/database/apple_cpu_fallback.hpp
@@ -41,7 +41,7 @@ const DatabaseEntry XgerApple = {
"Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XtrsvApple = {
- "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
+ "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XgemmApple = {
"Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } }
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl
index ebea77a3..8777eb77 100644
--- a/src/kernels/level2/xtrsv.opencl
+++ b/src/kernels/level2/xtrsv.opencl
@@ -18,7 +18,7 @@ R"(
// =================================================================================================
#if defined(ROUTINE_TRSV)
-__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
+__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
void FillVector(const int n, const int inc, const int offset,
__global real* restrict dest, const real_arg arg_value) {
const real value = GetRealArg(arg_value);
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index 5b178e53..f50e75cf 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -125,8 +125,8 @@ void FillVector(Queue &queue, const Device &device,
kernel.SetArgument(2, static_cast<int>(offset));
kernel.SetArgument(3, dest());
kernel.SetArgument(4, GetRealArg(constant_value));
- auto local = std::vector<size_t>{64};
- auto global = std::vector<size_t>{Ceil(n, 64)};
+ auto local = std::vector<size_t>{16};
+ auto global = std::vector<size_t>{Ceil(n, 16)};
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}