summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/database/database.cpp5
-rw-r--r--src/database/kernel_selection.hpp136
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine.hpp14
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_16.hpp26
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_32.hpp34
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_3232.hpp34
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_64.hpp26
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_6464.hpp26
-rw-r--r--src/routine.cpp2
-rw-r--r--src/routines/level3/xgemm.cpp6
-rw-r--r--src/routines/levelx/xgemmbatched.cpp2
-rw-r--r--src/tuning/routines/xgemm.cpp13
-rw-r--r--src/utilities/timing.hpp9
13 files changed, 184 insertions, 149 deletions
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 836c8803..2fa86151 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -30,10 +30,11 @@
#include "database/kernels/transpose/transpose.hpp"
#include "database/kernels/padtranspose/padtranspose.hpp"
+#include "database/kernels/gemm_routine/gemm_routine.hpp"
+
#include "database/kernels/xtrsv.hpp"
#include "database/kernels/invert.hpp"
#include "database/apple_cpu_fallback.hpp"
-#include "database/kernel_selection.hpp"
namespace clblast {
// =================================================================================================
@@ -54,7 +55,7 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data
database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble,
database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble,
- database::KernelSelectionHalf, database::KernelSelectionSingle, database::KernelSelectionDouble, database::KernelSelectionComplexSingle, database::KernelSelectionComplexDouble
+ database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble
};
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
database::XaxpyApple, database::XdotApple,
diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp
deleted file mode 100644
index 6d74b9f9..00000000
--- a/src/database/kernel_selection.hpp
+++ /dev/null
@@ -1,136 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This determines when to switch between the direct (for small sizes) and in-direct GEMM kernel
-// with pre/post-processing kernels (for larger sizes). These can be set in a similar way as for the
-// regular kernel tuning parameters: they can be specific for a certain vendor or device or can use
-// some common default values.
-//
-// =================================================================================================
-
-namespace clblast {
-namespace database {
-// =================================================================================================
-
-const DatabaseEntry KernelSelectionHalf = {
- "KernelSelection", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "default", { { kDeviceNameDefault, Params{ 1*1*1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "default", { { kDeviceNameDefault, Params{ 1280*1280*1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { { kDeviceNameDefault, Params{ 512*512*512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const DatabaseEntry KernelSelectionSingle = {
- "KernelSelection", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "default", { { kDeviceNameDefault, Params{ 1*1*1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "default", { { kDeviceNameDefault, Params{ 1280*1280*1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- {
- kDeviceTypeGPU, "ARM", {
- { "default", { { kDeviceNameDefault, Params{ 128*128*128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { { kDeviceNameDefault, Params{ 512*512*512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const DatabaseEntry KernelSelectionComplexSingle = {
- "KernelSelection", Precision::kComplexSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "default", { { kDeviceNameDefault, Params{ 1*1*1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "default", { { kDeviceNameDefault, Params{ 1280*1280*1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { { kDeviceNameDefault, Params{ 512*512*512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const DatabaseEntry KernelSelectionDouble = {
- "KernelSelection", Precision::kDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "default", { { kDeviceNameDefault, Params{ 1*1*1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "default", { { kDeviceNameDefault, Params{ 1280*1280*1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { { kDeviceNameDefault, Params{ 512*512*512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- }
-};
-
-// =================================================================================================
-
-const DatabaseEntry KernelSelectionComplexDouble = {
- "KernelSelection", Precision::kComplexDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
- { // Intel GPUs
- kDeviceTypeGPU, "Intel", {
- { "default", { { kDeviceNameDefault, Params{ 1*1*1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // NVIDIA GPUs
- kDeviceTypeGPU, "NVIDIA", {
- { "default", { { kDeviceNameDefault, Params{ 1280*1280*1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- { // Default
- kDeviceTypeAll, "default", {
- { "default", { { kDeviceNameDefault, Params{ 512*512*512, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
- }
- },
- }
-};
-
-// =================================================================================================
-} // namespace database
-} // namespace clblast
diff --git a/src/database/kernels/gemm_routine/gemm_routine.hpp b/src/database/kernels/gemm_routine/gemm_routine.hpp
new file mode 100644
index 00000000..f1470252
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine.hpp
@@ -0,0 +1,14 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine' kernels.
+//
+// =================================================================================================
+
+#include "database/kernels/gemm_routine/gemm_routine_16.hpp"
+#include "database/kernels/gemm_routine/gemm_routine_32.hpp"
+#include "database/kernels/gemm_routine/gemm_routine_3232.hpp"
+#include "database/kernels/gemm_routine/gemm_routine_64.hpp"
+#include "database/kernels/gemm_routine/gemm_routine_6464.hpp"
diff --git a/src/database/kernels/gemm_routine/gemm_routine_16.hpp b/src/database/kernels/gemm_routine/gemm_routine_16.hpp
new file mode 100644
index 00000000..e17afe4b
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine_16.hpp
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine16' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry GemmRoutineHalf = {
+ "GemmRoutine", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", {
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp
new file mode 100644
index 00000000..624de564
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp
@@ -0,0 +1,34 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine32' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry GemmRoutineSingle = {
+ "GemmRoutine", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "default", {
+ { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", {
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/gemm_routine/gemm_routine_3232.hpp b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp
new file mode 100644
index 00000000..689ae8d8
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine_3232.hpp
@@ -0,0 +1,34 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine3232' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry GemmRoutineComplexSingle = {
+ "GemmRoutine", Precision::kComplexSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // Intel GPUs
+ kDeviceTypeGPU, "Intel", {
+ { "default", {
+ { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", {
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/gemm_routine/gemm_routine_64.hpp b/src/database/kernels/gemm_routine/gemm_routine_64.hpp
new file mode 100644
index 00000000..7fd29128
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine_64.hpp
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine64' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry GemmRoutineDouble = {
+ "GemmRoutine", Precision::kDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", {
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/database/kernels/gemm_routine/gemm_routine_6464.hpp b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp
new file mode 100644
index 00000000..85d2c8f1
--- /dev/null
+++ b/src/database/kernels/gemm_routine/gemm_routine_6464.hpp
@@ -0,0 +1,26 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
+// is auto-generated by the 'scripts/database/database.py' Python script.
+//
+// This file populates the database with best-found tuning parameters for the 'Gemm_Routine6464' kernels.
+//
+// =================================================================================================
+
+namespace clblast {
+namespace database {
+
+const DatabaseEntry GemmRoutineComplexDouble = {
+ "GemmRoutine", Precision::kComplexDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // Default
+ kDeviceTypeAll, "default", {
+ { "default", {
+ { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
+ }
+};
+
+} // namespace database
+} // namespace clblast
diff --git a/src/routine.cpp b/src/routine.cpp
index 0f9fe360..48273eac 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -43,7 +43,7 @@ const std::unordered_map<std::string, const std::vector<std::string>> Routine::r
{"Padtranspose", routines_gemm_syrk},
{"Xgemm", routines_gemm_syrk},
{"XgemmDirect", routines_gemm},
- {"KernelSelection", routines_gemm},
+ {"GemmRoutine", routines_gemm},
{"Invert", routines_trsm},
};
// =================================================================================================
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index a0063ee2..94392dd0 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -23,7 +23,7 @@ namespace clblast {
template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
Routine(queue, event, name,
- {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
+ {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","GemmRoutine"},
PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
@@ -104,7 +104,9 @@ void Xgemm<T>::DoGemm(const Layout layout,
// Selects which version of GEMM to run
const auto m_n_k = static_cast<unsigned long long>(m) * static_cast<unsigned long long>(n) *
static_cast<unsigned long long>(k);
- const auto do_gemm_direct = (m_n_k < static_cast<unsigned long long>(db_["XGEMM_MIN_INDIRECT_SIZE"]));
+ const auto database_value = static_cast<unsigned long long>(db_["XGEMM_MIN_INDIRECT_SIZE"]);
+ const auto min_indirect_size = database_value * database_value * database_value;
+ const auto do_gemm_direct = (m_n_k < min_indirect_size);
if (do_gemm_direct) { // for small sizes (single kernel)
GemmDirect(m, n, k, alpha,
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
diff --git a/src/routines/levelx/xgemmbatched.cpp b/src/routines/levelx/xgemmbatched.cpp
index 8a015e97..152e7194 100644
--- a/src/routines/levelx/xgemmbatched.cpp
+++ b/src/routines/levelx/xgemmbatched.cpp
@@ -23,7 +23,7 @@ namespace clblast {
template <typename T>
XgemmBatched<T>::XgemmBatched(Queue &queue, EventPointer event, const std::string &name):
Routine(queue, event, name,
- {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
+ {"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","GemmRoutine"},
PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp
index 1ccaa0ca..f45e8635 100644
--- a/src/tuning/routines/xgemm.cpp
+++ b/src/tuning/routines/xgemm.cpp
@@ -42,7 +42,7 @@ void RunGemmRoutine(const size_t value, const Queue& queue, const std::vector<Bu
template <typename T>
void ForceSelectIndirectFrom(const size_t minimum_size, const Device &device) {
- const auto override_status = OverrideParameters(device(), "KernelSelection", PrecisionValue<T>(),
+ const auto override_status = OverrideParameters(device(), "GemmRoutine", PrecisionValue<T>(),
{{"XGEMM_MIN_INDIRECT_SIZE", minimum_size}});
if (override_status != StatusCode::kSuccess) {
throw RuntimeError("OverrideParameters failed with status " + ToString(override_status));
@@ -61,7 +61,7 @@ void TuneXgemm(int argc, char* argv[]) {
// Values for m, n, and k
const auto from = size_t{64};
- const auto to = size_t{1024};
+ const auto to = size_t{2048};
const auto step = size_t{64};
// OpenCL initialisation
@@ -106,7 +106,10 @@ void TuneXgemm(int argc, char* argv[]) {
scores[i] = TuningResult{
"gemm_kernel_selection",
static_cast<double>(score) / static_cast<double>(scores.size() - 1) + epsilon,
- TuningParameters{TuningParameter{"XGEMM_MIN_INDIRECT_SIZE", indirect[i].first}}
+ TuningParameters{
+ TuningParameter{"XGEMM_MIN_INDIRECT_SIZE", indirect[i].first},
+ TuningParameter{"PRECISION", static_cast<size_t>(precision)}
+ }
};
}
@@ -126,11 +129,15 @@ void TuneXgemm(int argc, char* argv[]) {
const auto precision_string = std::to_string(static_cast<size_t>(precision));
auto metadata = std::vector<std::pair<std::string,std::string>>{
{"kernel_family", "gemm_routine"},
+ {"arg_from", ToString(from)},
+ {"arg_to", ToString(to)},
+ {"arg_step", ToString(step)},
{"precision", precision_string},
};
PrintTimingsToFileAsJSON("clblast_routine_gemm_" + precision_string + ".json",
device, platform, metadata, scores);
+ printf("[ STATUS ] All done\n");
}
// =================================================================================================
diff --git a/src/utilities/timing.hpp b/src/utilities/timing.hpp
index 423e6e2b..bfad6147 100644
--- a/src/utilities/timing.hpp
+++ b/src/utilities/timing.hpp
@@ -73,16 +73,17 @@ void PrintTimingsToFileAsJSON(const std::string &filename,
const Device& device, const Platform& platform,
const std::vector<std::pair<std::string,std::string>> &metadata,
const std::vector<TuningResult>& tuning_results) {
+ printf("[ STATUS ] Writing results to '%s'\n", filename.c_str());
auto file = fopen(filename.c_str(), "w");
fprintf(file, "{\n");
for (auto &datum: metadata) {
fprintf(file, " \"%s\": \"%s\",\n", datum.first.c_str(), datum.second.c_str());
}
fprintf(file, " \"platform_version\": \"%s\",\n", platform.Version().c_str());
- fprintf(file, " \"device_name\": \"%s\",\n", GetDeviceName(device).c_str());
- fprintf(file, " \"device_vendor\": \"%s\",\n", platform.Vendor().c_str());
- fprintf(file, " \"device_type\": \"%s\",\n", device.Type().c_str());
- fprintf(file, " \"device_architecture\": \"%s\",\n", GetDeviceArchitecture(device).c_str());
+ fprintf(file, " \"clblast_device_name\": \"%s\",\n", GetDeviceName(device).c_str());
+ fprintf(file, " \"clblast_device_vendor\": \"%s\",\n", platform.Vendor().c_str());
+ fprintf(file, " \"clblast_device_type\": \"%s\",\n", device.Type().c_str());
+ fprintf(file, " \"clblast_device_architecture\": \"%s\",\n", GetDeviceArchitecture(device).c_str());
fprintf(file, " \"device_core_clock\": \"%zu\",\n", device.CoreClock());
fprintf(file, " \"device_compute_units\": \"%zu\",\n", device.ComputeUnits());
fprintf(file, " \"results\": [\n");