summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/database/database.cpp4
-rw-r--r--src/database/database.hpp1
-rw-r--r--src/database/kernel_selection.hpp79
-rw-r--r--src/routines/level3/xgemm.cpp11
4 files changed, 89 insertions, 6 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 2696fb9b..df9ac373 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -26,6 +26,7 @@
#include "database/kernels/pad.hpp"
#include "database/kernels/transpose.hpp"
#include "database/kernels/padtranspose.hpp"
+#include "database/kernel_selection.hpp"
namespace clblast {
// =================================================================================================
@@ -43,7 +44,8 @@ const std::vector<Database::DatabaseEntry> Database::database = {
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,
- PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble
+ PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble,
+ KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble
};
// =================================================================================================
diff --git a/src/database/database.hpp b/src/database/database.hpp
index 7c0afb46..912f0f15 100644
--- a/src/database/database.hpp
+++ b/src/database/database.hpp
@@ -80,6 +80,7 @@ class Database {
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
static const DatabaseEntry PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble;
+ static const DatabaseEntry KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble;
static const std::vector<DatabaseEntry> database;
// The constructor with a user-provided database overlay (potentially an empty vector)
diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp
new file mode 100644
index 00000000..bccfb0c0
--- /dev/null
+++ b/src/database/kernel_selection.hpp
@@ -0,0 +1,79 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This determines when to switch between the direct (for small sizes) and in-direct GEMM kernel
+// with pre/post-processing kernels (for larger sizes). These can be set in a similar way as for the
+// regular kernel tuning parameters: they can be specific for a certain vendor or device or can use
+// some common default values.
+//
+// =================================================================================================
+
+namespace clblast {
+// =================================================================================================
+
+const Database::DatabaseEntry Database::KernelSelectionHalf = {
+ "KernelSelection", Precision::kHalf, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::KernelSelectionSingle = {
+ "KernelSelection", Precision::kSingle, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::KernelSelectionComplexSingle = {
+ "KernelSelection", Precision::kComplexSingle, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::KernelSelectionDouble = {
+ "KernelSelection", Precision::kDouble, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+
+const Database::DatabaseEntry Database::KernelSelectionComplexDouble = {
+ "KernelSelection", Precision::kComplexDouble, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
+ }
+ },
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 93f5d30c..9d912374 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -22,7 +22,8 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm", "XgemmDirect"},
+ Routine(queue, event, name,
+ {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
PrecisionValue<T>()) {
source_string_ =
#include "../../kernels/level3/level3.opencl"
@@ -102,15 +103,15 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld);
if (ErrorIn(status)) { return status; }
- // Optionally runs the direct version of GEMM. TODO: Set this based on the arguments
- const auto do_gemm_direct = true; // for now, for testing
- if (do_gemm_direct) {
+ // Selects which version of GEMM to run
+ const auto do_gemm_direct = (m * n * k < db_["XGEMM_MIN_INDIRECT_SIZE"]);
+ if (do_gemm_direct) { // for small sizes (single kernel)
return GemmDirect(m, n, k, alpha,
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
c_buffer, c_offset, c_ld,
a_do_transpose, b_do_transpose, c_do_transpose, a_conjugate, b_conjugate);
}
- else {
+ else { // for larger sizes (pre/post-processing plus a very fast kernel)
return GemmIndirect(m, n, k, alpha,
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
c_buffer, c_offset, c_ld,