From a3e67f2be2ea9f964c8077d379ca522c6c439036 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 6 Oct 2016 19:51:12 +0200 Subject: Added a kernel selection database to select between the direct and indirect GEMM kernels --- src/database/database.cpp | 4 +- src/database/database.hpp | 1 + src/database/kernel_selection.hpp | 79 +++++++++++++++++++++++++++++++++++++++ src/routines/level3/xgemm.cpp | 11 +++--- 4 files changed, 89 insertions(+), 6 deletions(-) create mode 100644 src/database/kernel_selection.hpp (limited to 'src') diff --git a/src/database/database.cpp b/src/database/database.cpp index 2696fb9b..df9ac373 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -26,6 +26,7 @@ #include "database/kernels/pad.hpp" #include "database/kernels/transpose.hpp" #include "database/kernels/padtranspose.hpp" +#include "database/kernel_selection.hpp" namespace clblast { // ================================================================================================= @@ -43,7 +44,8 @@ const std::vector Database::database = { CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, - PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble + PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble, + KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble }; // ================================================================================================= diff --git a/src/database/database.hpp b/src/database/database.hpp index 7c0afb46..912f0f15 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -80,6 +80,7 @@ class Database { static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; static const DatabaseEntry PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble; + static const DatabaseEntry KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble; static const std::vector database; // The constructor with a user-provided database overlay (potentially an empty vector) diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp new file mode 100644 index 00000000..bccfb0c0 --- /dev/null +++ b/src/database/kernel_selection.hpp @@ -0,0 +1,79 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This determines when to switch between the direct (for small sizes) and in-direct GEMM kernel +// with pre/post-processing kernels (for larger sizes). These can be set in a similar way as for the +// regular kernel tuning parameters: they can be specific for a certain vendor or device or can use +// some common default values. +// +// ================================================================================================= + +namespace clblast { +// ================================================================================================= + +const Database::DatabaseEntry Database::KernelSelectionHalf = { + "KernelSelection", Precision::kHalf, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::KernelSelectionSingle = { + "KernelSelection", Precision::kSingle, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::KernelSelectionComplexSingle = { + "KernelSelection", Precision::kComplexSingle, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::KernelSelectionDouble = { + "KernelSelection", Precision::kDouble, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } }, + } + }, + } +}; + +// ================================================================================================= + +const Database::DatabaseEntry Database::KernelSelectionComplexDouble = { + "KernelSelection", Precision::kComplexDouble, { + { // Default + kDeviceTypeAll, "default", { + { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } }, + } + }, + } +}; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp index 93f5d30c..9d912374 100644 --- a/src/routines/level3/xgemm.cpp +++ b/src/routines/level3/xgemm.cpp @@ -22,7 +22,8 @@ namespace clblast { // Constructor: forwards to base class constructor template Xgemm::Xgemm(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm", "XgemmDirect"}, + Routine(queue, event, name, + {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"}, PrecisionValue()) { source_string_ = #include "../../kernels/level3/level3.opencl" @@ -102,15 +103,15 @@ StatusCode Xgemm::DoGemm(const Layout layout, status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } - // Optionally runs the direct version of GEMM. TODO: Set this based on the arguments - const auto do_gemm_direct = true; // for now, for testing - if (do_gemm_direct) { + // Selects which version of GEMM to run + const auto do_gemm_direct = (m * n * k < db_["XGEMM_MIN_INDIRECT_SIZE"]); + if (do_gemm_direct) { // for small sizes (single kernel) return GemmDirect(m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, a_do_transpose, b_do_transpose, c_do_transpose, a_conjugate, b_conjugate); } - else { + else { // for larger sizes (pre/post-processing plus a very fast kernel) return GemmIndirect(m, n, k, alpha, a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, c_buffer, c_offset, c_ld, -- cgit v1.2.3