diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-30 20:39:21 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-30 20:39:21 +0100 |
commit | 5c90577dfd365acde14ed6a7e99aa17cf4c6f6e4 (patch) | |
tree | 58d51cb250419daabcd62ee09204914890202007 | |
parent | 334a26eb12a10b597312db2a1b7de1548cba6327 (diff) |
Added collecting and printing of scores for the kernel-selection tuner
-rw-r--r-- | src/tuning/routines/xgemm.cpp | 33 | ||||
-rw-r--r-- | src/utilities/timing.hpp | 37 |
2 files changed, 61 insertions, 9 deletions
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp index 9590323a..1ccaa0ca 100644 --- a/src/tuning/routines/xgemm.cpp +++ b/src/tuning/routines/xgemm.cpp @@ -90,15 +90,36 @@ void TuneXgemm(int argc, char* argv[]) { ForceSelectIndirectFrom<T>(to * to * to + 1, device); const auto direct = TimeRoutine(from, to, step, num_runs, queue, buffers, RunGemmRoutine<T>); - // Results - printf("[----------] Collecting results\n"); + // Determining final score and best kernel selection point assert(indirect.size() == direct.size()); + printf("[----------] Collecting results\n"); + auto ratios = std::vector<double>(indirect.size()); + for (auto i = size_t{0}; i < indirect.size(); ++i) { + ratios[i] = indirect[i].second / direct[i].second; + } + auto scores = std::vector<TuningResult>(ratios.size()); + for (auto i = size_t{0}; i < scores.size(); ++i) { + auto score = 0; + for (auto j = size_t{0}; j < i; ++j) { score += (ratios[j] <= 1.0); } + for (auto j = i + 1; j < ratios.size(); ++j) { score += (ratios[j] > 1.0); } + const auto epsilon = (scores.size() - i) / 1e3; // favour later results over earlier ones + scores[i] = TuningResult{ + "gemm_kernel_selection", + static_cast<double>(score) / static_cast<double>(scores.size() - 1) + epsilon, + TuningParameters{TuningParameter{"XGEMM_MIN_INDIRECT_SIZE", indirect[i].first}} + }; + } + + // Displaying results for (auto i = size_t{0}; i < indirect.size(); ++i) { assert(indirect[i].first == direct[i].first); const auto value = indirect[i].first; - const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6); - const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6); - printf("[ -------> ] %7zu %8.2lf %8.2lf\n", value, gflops_indirect, gflops_direct); + if (indirect[i].second != -1 && direct[i].second != -1) { + const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6); + const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6); + printf("[ -------> ] %7zu %8.2lf %8.2lf %8.2lf\n", + value, gflops_indirect, gflops_direct, scores[i].score); + } } // Outputs the results as JSON to disk, including some meta-data @@ -108,7 +129,7 @@ void TuneXgemm(int argc, char* argv[]) { {"precision", precision_string}, }; PrintTimingsToFileAsJSON("clblast_routine_gemm_" + precision_string + ".json", - device, platform, metadata); + device, platform, metadata, scores); } diff --git a/src/utilities/timing.hpp b/src/utilities/timing.hpp index 4622aa99..423e6e2b 100644 --- a/src/utilities/timing.hpp +++ b/src/utilities/timing.hpp @@ -65,13 +65,18 @@ std::vector<Timing> TimeRoutine(const size_t from, const size_t to, const size_t // ================================================================================================= +using TuningParameter = std::pair<std::string, size_t>; +using TuningParameters = std::vector<TuningParameter>; +struct TuningResult { std::string name; double score; TuningParameters parameters; }; + void PrintTimingsToFileAsJSON(const std::string &filename, const Device& device, const Platform& platform, - const std::vector<std::pair<std::string,std::string>> &descriptions) { + const std::vector<std::pair<std::string,std::string>> &metadata, + const std::vector<TuningResult>& tuning_results) { auto file = fopen(filename.c_str(), "w"); fprintf(file, "{\n"); - for (auto &description: descriptions) { - fprintf(file, " \"%s\": \"%s\",\n", description.first.c_str(), description.second.c_str()); + for (auto &datum: metadata) { + fprintf(file, " \"%s\": \"%s\",\n", datum.first.c_str(), datum.second.c_str()); } fprintf(file, " \"platform_version\": \"%s\",\n", platform.Version().c_str()); fprintf(file, " \"device_name\": \"%s\",\n", GetDeviceName(device).c_str()); @@ -80,6 +85,32 @@ void PrintTimingsToFileAsJSON(const std::string &filename, fprintf(file, " \"device_architecture\": \"%s\",\n", GetDeviceArchitecture(device).c_str()); fprintf(file, " \"device_core_clock\": \"%zu\",\n", device.CoreClock()); fprintf(file, " \"device_compute_units\": \"%zu\",\n", device.ComputeUnits()); + fprintf(file, " \"results\": [\n"); + + // Loops over all results + auto num_results = tuning_results.size(); + for (auto r = size_t{0}; r < num_results; ++r) { + auto result = tuning_results[r]; + fprintf(file, " {\n"); + fprintf(file, " \"kernel\": \"%s\",\n", result.name.c_str()); + fprintf(file, " \"time\": %.3lf,\n", result.score); + + // Loops over all the parameters for this result + fprintf(file, " \"parameters\": {"); + auto num_configs = result.parameters.size(); + for (auto p=size_t{0}; p<num_configs; ++p) { + auto config = result.parameters[p]; + fprintf(file, "\"%s\": %zu", config.first.c_str(), config.second); + if (p < num_configs-1) { fprintf(file, ","); } + } + fprintf(file, "}\n"); + + // The footer + fprintf(file, " }"); + if (r < num_results - 1) { fprintf(file, ","); } + fprintf(file, "\n"); + } + fprintf(file, " ]\n"); fprintf(file, "}\n"); fclose(file); } |