// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file provides helper functions for time measurement and such. // // ================================================================================================= #ifndef CLBLAST_TIMING_H_ #define CLBLAST_TIMING_H_ #include #include #include #include #include #include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= template double TimeFunction(const size_t num_runs, F const &function) { function(); // warm-up auto timings = std::vector(num_runs); for (auto &timing: timings) { const auto start_time = std::chrono::steady_clock::now(); function(); const auto elapsed_time = std::chrono::steady_clock::now() - start_time; timing = std::chrono::duration(elapsed_time).count(); } return *std::min_element(timings.begin(), timings.end()); } // ================================================================================================= using Timing = std::pair; template std::vector TimeRoutine(const size_t from, const size_t to, const size_t step, const size_t num_runs, const Queue& queue, const std::vector>& buffers, F const &routine) { auto timings = std::vector(); for (auto value = from; value < to; value += step) { printf("[ RUN ] Running with value %zu\n", value); try { const auto FunctionToTune = [&]() { routine(value, queue, buffers); }; const auto time_ms = TimeFunction(num_runs, FunctionToTune); printf("[ OK ] Took %.2lf ms\n", time_ms); timings.push_back({value, time_ms}); } catch (...) { printf("[ ERROR ] Exception caught\n"); timings.push_back({value, -1.0}); // invalid } } return timings; } // ================================================================================================= using TuningParameter = std::pair; using TuningParameters = std::vector; struct TuningResult { std::string name; double score; TuningParameters parameters; }; void PrintTimingsToFileAsJSON(const std::string &filename, const Device& device, const Platform& platform, const std::vector> &metadata, const std::vector& tuning_results) { printf("[ STATUS ] Writing results to '%s'\n", filename.c_str()); auto file = fopen(filename.c_str(), "w"); fprintf(file, "{\n"); for (auto &datum: metadata) { fprintf(file, " \"%s\": \"%s\",\n", datum.first.c_str(), datum.second.c_str()); } fprintf(file, " \"platform_version\": \"%s\",\n", platform.Version().c_str()); fprintf(file, " \"clblast_device_name\": \"%s\",\n", GetDeviceName(device).c_str()); fprintf(file, " \"clblast_device_vendor\": \"%s\",\n", platform.Vendor().c_str()); fprintf(file, " \"clblast_device_type\": \"%s\",\n", device.Type().c_str()); fprintf(file, " \"clblast_device_architecture\": \"%s\",\n", GetDeviceArchitecture(device).c_str()); fprintf(file, " \"device_core_clock\": \"%zu\",\n", device.CoreClock()); fprintf(file, " \"device_compute_units\": \"%zu\",\n", device.ComputeUnits()); fprintf(file, " \"results\": [\n"); // Loops over all results auto num_results = tuning_results.size(); for (auto r = size_t{0}; r < num_results; ++r) { auto result = tuning_results[r]; fprintf(file, " {\n"); fprintf(file, " \"kernel\": \"%s\",\n", result.name.c_str()); fprintf(file, " \"time\": %.3lf,\n", result.score); // Loops over all the parameters for this result fprintf(file, " \"parameters\": {"); auto num_configs = result.parameters.size(); for (auto p=size_t{0}; p