summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/database/apple_cpu_fallback.hpp2
-rw-r--r--src/kernels/level2/xtrsv.opencl2
-rw-r--r--src/routines/common.cpp4
3 files changed, 4 insertions, 4 deletions
diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp
index e1aa4661..8d257b5e 100644
--- a/src/database/apple_cpu_fallback.hpp
+++ b/src/database/apple_cpu_fallback.hpp
@@ -41,7 +41,7 @@ const DatabaseEntry XgerApple = {
"Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XtrsvApple = {
- "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
+ "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XgemmApple = {
"Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } }
diff --git a/src/kernels/level2/xtrsv.opencl b/src/kernels/level2/xtrsv.opencl
index ebea77a3..8777eb77 100644
--- a/src/kernels/level2/xtrsv.opencl
+++ b/src/kernels/level2/xtrsv.opencl
@@ -18,7 +18,7 @@ R"(
// =================================================================================================
#if defined(ROUTINE_TRSV)
-__kernel __attribute__((reqd_work_group_size(64, 1, 1)))
+__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
void FillVector(const int n, const int inc, const int offset,
__global real* restrict dest, const real_arg arg_value) {
const real value = GetRealArg(arg_value);
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index 5b178e53..f50e75cf 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -125,8 +125,8 @@ void FillVector(Queue &queue, const Device &device,
kernel.SetArgument(2, static_cast<int>(offset));
kernel.SetArgument(3, dest());
kernel.SetArgument(4, GetRealArg(constant_value));
- auto local = std::vector<size_t>{64};
- auto global = std::vector<size_t>{Ceil(n, 64)};
+ auto local = std::vector<size_t>{16};
+ auto global = std::vector<size_t>{Ceil(n, 16)};
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}