From 9b0a435fb00b845b875590be90acffcd4f3bb009 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 2 Nov 2017 21:47:14 +0100 Subject: Integrated the GEMM routine tuner for kernel selection; added first tuning results --- src/routines/level3/xgemm.cpp | 6 ++++-- src/routines/levelx/xgemmbatched.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'src/routines') diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index a0063ee2..94392dd0 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -23,7 +23,7 @@ namespace clblast { template Xgemm::Xgemm(Queue &queue, EventPointer event, const std::string &name): Routine(queue, event, name, - {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"}, + {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","GemmRoutine"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" @@ -104,7 +104,9 @@ void Xgemm::DoGemm(const Layout layout, // Selects which version of GEMM to run const auto m_n_k = static_cast(m) * static_cast(n) * static_cast(k); - const auto do_gemm_direct = (m_n_k < static_cast(db_["XGEMM_MIN_INDIRECT_SIZE"])); + const auto database_value = static_cast(db_["XGEMM_MIN_INDIRECT_SIZE"]); + const auto min_indirect_size = database_value * database_value * database_value; + const auto do_gemm_direct = (m_n_k < min_indirect_size); if (do_gemm_direct) { // for small sizes (single kernel) GemmDirect(m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, diff --git a/src/routines/levelx/xgemmbatched.cpp b/src/routines/levelx/xgemmbatched.cpp index 8a015e97..152e7194 100644 --- a/src/routines/levelx/xgemmbatched.cpp +++ b/src/routines/levelx/xgemmbatched.cpp @@ -23,7 +23,7 @@ namespace clblast { template XgemmBatched::XgemmBatched(Queue &queue, EventPointer event, const std::string &name): Routine(queue, event, name, - {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"}, + {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","GemmRoutine"}, PrecisionValue(), {}, { #include "../../kernels/level3/level3.opencl" #include "../../kernels/level3/copy_fast.opencl" -- cgit v1.2.3