summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-19 12:48:59 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-19 12:48:59 +0200
commit66583b3cdac143bd43da1ee6f0f5af012b8ca39a (patch)
treecd2b571da58a9259476f7b028cf6b63ad92a4f3a /src/tuning
parent637e49e134df2e6be3e5ce4901bbc8803535e14d (diff)
The GEMM routine tuner now loads kernel JSON tuning results from disk if available; now run part of alltuners target
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/routines/xgemm.cpp18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp
index 0721ad7c..ceb91f4d 100644
--- a/src/tuning/routines/xgemm.cpp
+++ b/src/tuning/routines/xgemm.cpp
@@ -15,8 +15,10 @@
#include <exception>
#include <string>
#include <vector>
+#include <iostream>
#include "utilities/utilities.hpp"
+#include "../test/test_utilities.hpp"
#include "tuning/routines/routine_tuner.hpp"
namespace clblast {
@@ -101,6 +103,22 @@ void TuneXgemm(int argc, char* argv[]) {
const auto context = Context(device);
auto queue = Queue(context, device);
+ // Pre-load GEMM kernel tuning results if they exist
+ printf("* The GEMM routine tuner requires already tuned kernels\n");
+ printf(" Applying tuning results from disk if they exist...\n\n");
+ const auto kernel_names = {"xgemm_1", "xgemm_direct_1", "copy", "pad", "transpose", "padtranspose"};
+ for (const auto& kernel_name : kernel_names) {
+ const auto tuner_file_name = "clblast_" + std::string{kernel_name} + "_" +
+ ToString(static_cast<int>(precision)) + ".json";
+ printf("* Looking for tuning results in the current folder: '%s'\n", tuner_file_name.c_str());
+ if (std::ifstream(tuner_file_name)) { // Checks if the file exists on disk
+ OverrideParametersFromJSONFiles({tuner_file_name}, device(), precision);
+ }
+ else {
+ printf(" Not found: assuming the kernel '%s' is already tuned\n\n", kernel_name);
+ }
+ }
+
// Run the tuners for the XGEMM routines
TuneKernelSelection<T>(platform, device, context, queue, precision, RunGemmRoutine<T>,
64, 2048, 64, 1, num_runs,