From c85c385aaf0edbbd03d8624bfc4a82f65a470675 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 23 May 2018 22:36:38 +0200 Subject: Added an option in the clients to output timing statistics: minimum, mean, and standard-deviation --- test/performance/client.cpp | 86 ++++++++++++++++++++++++++++++--------------- test/performance/client.hpp | 7 ++-- 2 files changed, 62 insertions(+), 31 deletions(-) (limited to 'test') diff --git a/test/performance/client.cpp b/test/performance/client.cpp index 9480d11a..b45bb08e 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include "utilities/utilities.hpp" #include "test/performance/client.hpp" @@ -144,6 +145,7 @@ Arguments Client::ParseArguments(int argc, char *argv[], const size_t le args.print_help = CheckArgument(command_line_args, help, kArgHelp); args.silent = CheckArgument(command_line_args, help, kArgQuiet); args.no_abbrv = CheckArgument(command_line_args, help, kArgNoAbbreviations); + args.full_statistics= CheckArgument(command_line_args, help, kArgFullStatistics); warm_up_ = CheckArgument(command_line_args, help, kArgWarmUp); // Parse the optional JSON file name arguments @@ -252,32 +254,32 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar}; // Runs the routines and collects the timings - auto timings = std::vector>(); - auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast"); - timings.push_back(std::pair("CLBlast", ms_clblast)); + auto timings = std::vector>(); + auto time_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast"); + timings.push_back(std::pair("CLBlast", time_clblast)); if (args.compare_clblas) { - auto ms_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference1_, "clBLAS"); - timings.push_back(std::pair("clBLAS", ms_clblas)); + auto time_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference1_, "clBLAS"); + timings.push_back(std::pair("clBLAS", time_clblas)); } if (args.compare_cblas) { auto buffers_host = BuffersHost(); DeviceToHost(args, buffers, buffers_host, queue, buffers_in_); - auto ms_cblas = TimedExecution(args.num_runs, args, buffers_host, queue, run_reference2_, "CPU BLAS"); + auto time_cblas = TimedExecution(args.num_runs, args, buffers_host, queue, run_reference2_, "CPU BLAS"); HostToDevice(args, buffers, buffers_host, queue, buffers_out_); - timings.push_back(std::pair("CPU BLAS", ms_cblas)); + timings.push_back(std::pair("CPU BLAS", time_cblas)); } if (args.compare_cublas) { auto buffers_host = BuffersHost(); auto buffers_cuda = BuffersCUDA(); DeviceToHost(args, buffers, buffers_host, queue, buffers_in_); HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_); - auto ms_cublas = 0.0; + TimeResult time_cublas; try { - ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS"); + time_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS"); } catch (std::runtime_error e) { } CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_); HostToDevice(args, buffers, buffers_host, queue, buffers_out_); - timings.push_back(std::pair("cuBLAS", ms_cublas)); + timings.push_back(std::pair("cuBLAS", time_cublas)); } // Prints the performance of the tested libraries @@ -310,9 +312,9 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) // value found in the vector of timing results. The return value is in milliseconds. template template -double Client::TimedExecution(const size_t num_runs, const Arguments &args, - BufferType &buffers, Queue &queue, - RoutineType run_blas, const std::string &library_name) { +typename Client::TimeResult Client::TimedExecution(const size_t num_runs, const Arguments &args, + BufferType &buffers, Queue &queue, + RoutineType run_blas, const std::string &library_name) { auto status = StatusCode::kSuccess; // Do an optional warm-up to omit compilation times and initialisations from the measurements @@ -342,7 +344,18 @@ double Client::TimedExecution(const size_t num_runs, const Arguments &ar auto elapsed_time = std::chrono::steady_clock::now() - start_time; timing = std::chrono::duration(elapsed_time).count(); } - return *std::min_element(timings.begin(), timings.end()); + + // Compute statistics + auto result = TimeResult(); + const auto sum = std::accumulate(timings.begin(), timings.end(), 0.0); + const auto mean = sum / timings.size(); + std::vector diff(timings.size()); + std::transform(timings.begin(), timings.end(), diff.begin(), [mean](double x) { return x - mean; }); + const auto sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); + result.mean = mean; + result.standard_deviation = std::sqrt(sq_sum / timings.size()); + result.minimum = *std::min_element(timings.begin(), timings.end()); + return result; } // ================================================================================================= @@ -363,17 +376,25 @@ void Client::PrintTableHeader(const Arguments& args) { // Second line for (auto &option: options_) { fprintf(stdout, "%9s;", option.c_str()); } - fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1"); - if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); } - if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); } - if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); } + if (args.full_statistics) { + fprintf(stdout, "%9s;%9s;%9s", "min_ms_1", "mean_ms_1", "stddev_1"); + if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_2", "mean_ms_2", "stddev_2"); } + if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_3", "mean_ms_3", "stddev_3"); } + if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_4", "mean_ms_4", "stddev_4"); } + } + else { + fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1"); + if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); } + if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); } + if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); } + } fprintf(stdout, "\n"); } // Print a performance-result row template void Client::PrintTableRow(const Arguments& args, - const std::vector>& timings) { + const std::vector>& timings) { // Creates a vector of relevant variables auto integers = std::vector{}; @@ -441,16 +462,25 @@ void Client::PrintTableRow(const Arguments& args, // Loops over all tested libraries for (const auto& timing : timings) { + const auto library_name = timing.first; + const auto minimum_ms = timing.second.minimum; + if (library_name != "CLBlast") { fprintf(stdout, ";"); } + + // Either output full statistics + if (args.full_statistics) { + const auto mean_ms = timing.second.mean; + const auto standard_deviation = timing.second.standard_deviation; + fprintf(stdout, "%9.3lf;%9.3lf;%9.3lf", minimum_ms, mean_ms, standard_deviation); + } - // Computes the GFLOPS and GB/s metrics - auto flops = get_flops_(args); - auto bytes = get_bytes_(args); - auto gflops = (timing.second != 0.0) ? (flops*1e-6)/timing.second : 0; - auto gbs = (timing.second != 0.0) ? (bytes*1e-6)/timing.second : 0; - - // Outputs the performance numbers - if (timing.first != "CLBlast") { fprintf(stdout, ";"); } - fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf", timing.second, gflops, gbs); + // ... or outputs minimum time and the GFLOPS and GB/s metrics + else { + const auto flops = get_flops_(args); + const auto bytes = get_bytes_(args); + const auto gflops = (minimum_ms != 0.0) ? (flops*1e-6)/minimum_ms : 0; + const auto gbs = (minimum_ms != 0.0) ? (bytes*1e-6)/minimum_ms : 0; + fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf", minimum_ms, gflops, gbs); + } } fprintf(stdout, "\n"); } diff --git a/test/performance/client.hpp b/test/performance/client.hpp index eb224976..87471f3a 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -42,6 +42,7 @@ template class Client { public: static const int kSeed; + struct TimeResult { double minimum; double mean; double standard_deviation; }; // Shorthand for the routine-specific functions passed to the tester using Routine = std::function&, Buffers&, Queue&)>; @@ -72,15 +73,15 @@ class Client { // Runs a function a given number of times and returns the execution time of the shortest instance template - double TimedExecution(const size_t num_runs, const Arguments &args, BufferType &buffers, - Queue &queue, RoutineType run_blas, const std::string &library_name); + TimeResult TimedExecution(const size_t num_runs, const Arguments &args, BufferType &buffers, + Queue &queue, RoutineType run_blas, const std::string &library_name); // Prints the header of a performance-data table void PrintTableHeader(const Arguments& args); // Prints a row of performance data, including results of two libraries void PrintTableRow(const Arguments& args, - const std::vector>& timings); + const std::vector>& timings); // The routine-specific functions passed to the tester const Routine run_routine_; -- cgit v1.2.3