summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-10-30 20:39:21 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-10-30 20:39:21 +0100
commit5c90577dfd365acde14ed6a7e99aa17cf4c6f6e4 (patch)
tree58d51cb250419daabcd62ee09204914890202007
parent334a26eb12a10b597312db2a1b7de1548cba6327 (diff)
Added collecting and printing of scores for the kernel-selection tuner
-rw-r--r--src/tuning/routines/xgemm.cpp33
-rw-r--r--src/utilities/timing.hpp37
2 files changed, 61 insertions, 9 deletions
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp
index 9590323a..1ccaa0ca 100644
--- a/src/tuning/routines/xgemm.cpp
+++ b/src/tuning/routines/xgemm.cpp
@@ -90,15 +90,36 @@ void TuneXgemm(int argc, char* argv[]) {
ForceSelectIndirectFrom<T>(to * to * to + 1, device);
const auto direct = TimeRoutine(from, to, step, num_runs, queue, buffers, RunGemmRoutine<T>);
- // Results
- printf("[----------] Collecting results\n");
+ // Determining final score and best kernel selection point
assert(indirect.size() == direct.size());
+ printf("[----------] Collecting results\n");
+ auto ratios = std::vector<double>(indirect.size());
+ for (auto i = size_t{0}; i < indirect.size(); ++i) {
+ ratios[i] = indirect[i].second / direct[i].second;
+ }
+ auto scores = std::vector<TuningResult>(ratios.size());
+ for (auto i = size_t{0}; i < scores.size(); ++i) {
+ auto score = 0;
+ for (auto j = size_t{0}; j < i; ++j) { score += (ratios[j] <= 1.0); }
+ for (auto j = i + 1; j < ratios.size(); ++j) { score += (ratios[j] > 1.0); }
+ const auto epsilon = (scores.size() - i) / 1e3; // favour later results over earlier ones
+ scores[i] = TuningResult{
+ "gemm_kernel_selection",
+ static_cast<double>(score) / static_cast<double>(scores.size() - 1) + epsilon,
+ TuningParameters{TuningParameter{"XGEMM_MIN_INDIRECT_SIZE", indirect[i].first}}
+ };
+ }
+
+ // Displaying results
for (auto i = size_t{0}; i < indirect.size(); ++i) {
assert(indirect[i].first == direct[i].first);
const auto value = indirect[i].first;
- const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6);
- const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6);
- printf("[ -------> ] %7zu %8.2lf %8.2lf\n", value, gflops_indirect, gflops_direct);
+ if (indirect[i].second != -1 && direct[i].second != -1) {
+ const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6);
+ const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6);
+ printf("[ -------> ] %7zu %8.2lf %8.2lf %8.2lf\n",
+ value, gflops_indirect, gflops_direct, scores[i].score);
+ }
}
// Outputs the results as JSON to disk, including some meta-data
@@ -108,7 +129,7 @@ void TuneXgemm(int argc, char* argv[]) {
{"precision", precision_string},
};
PrintTimingsToFileAsJSON("clblast_routine_gemm_" + precision_string + ".json",
- device, platform, metadata);
+ device, platform, metadata, scores);
}
diff --git a/src/utilities/timing.hpp b/src/utilities/timing.hpp
index 4622aa99..423e6e2b 100644
--- a/src/utilities/timing.hpp
+++ b/src/utilities/timing.hpp
@@ -65,13 +65,18 @@ std::vector<Timing> TimeRoutine(const size_t from, const size_t to, const size_t
// =================================================================================================
+using TuningParameter = std::pair<std::string, size_t>;
+using TuningParameters = std::vector<TuningParameter>;
+struct TuningResult { std::string name; double score; TuningParameters parameters; };
+
void PrintTimingsToFileAsJSON(const std::string &filename,
const Device& device, const Platform& platform,
- const std::vector<std::pair<std::string,std::string>> &descriptions) {
+ const std::vector<std::pair<std::string,std::string>> &metadata,
+ const std::vector<TuningResult>& tuning_results) {
auto file = fopen(filename.c_str(), "w");
fprintf(file, "{\n");
- for (auto &description: descriptions) {
- fprintf(file, " \"%s\": \"%s\",\n", description.first.c_str(), description.second.c_str());
+ for (auto &datum: metadata) {
+ fprintf(file, " \"%s\": \"%s\",\n", datum.first.c_str(), datum.second.c_str());
}
fprintf(file, " \"platform_version\": \"%s\",\n", platform.Version().c_str());
fprintf(file, " \"device_name\": \"%s\",\n", GetDeviceName(device).c_str());
@@ -80,6 +85,32 @@ void PrintTimingsToFileAsJSON(const std::string &filename,
fprintf(file, " \"device_architecture\": \"%s\",\n", GetDeviceArchitecture(device).c_str());
fprintf(file, " \"device_core_clock\": \"%zu\",\n", device.CoreClock());
fprintf(file, " \"device_compute_units\": \"%zu\",\n", device.ComputeUnits());
+ fprintf(file, " \"results\": [\n");
+
+ // Loops over all results
+ auto num_results = tuning_results.size();
+ for (auto r = size_t{0}; r < num_results; ++r) {
+ auto result = tuning_results[r];
+ fprintf(file, " {\n");
+ fprintf(file, " \"kernel\": \"%s\",\n", result.name.c_str());
+ fprintf(file, " \"time\": %.3lf,\n", result.score);
+
+ // Loops over all the parameters for this result
+ fprintf(file, " \"parameters\": {");
+ auto num_configs = result.parameters.size();
+ for (auto p=size_t{0}; p<num_configs; ++p) {
+ auto config = result.parameters[p];
+ fprintf(file, "\"%s\": %zu", config.first.c_str(), config.second);
+ if (p < num_configs-1) { fprintf(file, ","); }
+ }
+ fprintf(file, "}\n");
+
+ // The footer
+ fprintf(file, " }");
+ if (r < num_results - 1) { fprintf(file, ","); }
+ fprintf(file, "\n");
+ }
+ fprintf(file, " ]\n");
fprintf(file, "}\n");
fclose(file);
}