summaryrefslogtreecommitdiff
path: root/test/performance/client.cpp
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-05-23 22:36:38 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-05-23 22:36:38 +0200
commitc85c385aaf0edbbd03d8624bfc4a82f65a470675 (patch)
treec0cf2427e61c8205b600674c84ae3124b9721376 /test/performance/client.cpp
parent8e28a7699d73f24cb05c87a2b67c3690d64e3e11 (diff)
Added an option in the clients to output timing statistics: minimum, mean, and standard-deviation
Diffstat (limited to 'test/performance/client.cpp')
-rw-r--r--test/performance/client.cpp86
1 files changed, 58 insertions, 28 deletions
diff --git a/test/performance/client.cpp b/test/performance/client.cpp
index 9480d11a..b45bb08e 100644
--- a/test/performance/client.cpp
+++ b/test/performance/client.cpp
@@ -17,6 +17,7 @@
#include <algorithm>
#include <chrono>
#include <random>
+#include <tuning/tuning.hpp>
#include "utilities/utilities.hpp"
#include "test/performance/client.hpp"
@@ -144,6 +145,7 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le
args.print_help = CheckArgument(command_line_args, help, kArgHelp);
args.silent = CheckArgument(command_line_args, help, kArgQuiet);
args.no_abbrv = CheckArgument(command_line_args, help, kArgNoAbbreviations);
+ args.full_statistics= CheckArgument(command_line_args, help, kArgFullStatistics);
warm_up_ = CheckArgument(command_line_args, help, kArgWarmUp);
// Parse the optional JSON file name arguments
@@ -252,32 +254,32 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
auto buffers = Buffers<T>{x_vec, y_vec, a_mat, b_mat, c_mat, ap_mat, scalar};
// Runs the routines and collects the timings
- auto timings = std::vector<std::pair<std::string, double>>();
- auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast");
- timings.push_back(std::pair<std::string, double>("CLBlast", ms_clblast));
+ auto timings = std::vector<std::pair<std::string, TimeResult>>();
+ auto time_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast");
+ timings.push_back(std::pair<std::string, TimeResult>("CLBlast", time_clblast));
if (args.compare_clblas) {
- auto ms_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference1_, "clBLAS");
- timings.push_back(std::pair<std::string, double>("clBLAS", ms_clblas));
+ auto time_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference1_, "clBLAS");
+ timings.push_back(std::pair<std::string, TimeResult>("clBLAS", time_clblas));
}
if (args.compare_cblas) {
auto buffers_host = BuffersHost<T>();
DeviceToHost(args, buffers, buffers_host, queue, buffers_in_);
- auto ms_cblas = TimedExecution(args.num_runs, args, buffers_host, queue, run_reference2_, "CPU BLAS");
+ auto time_cblas = TimedExecution(args.num_runs, args, buffers_host, queue, run_reference2_, "CPU BLAS");
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
- timings.push_back(std::pair<std::string, double>("CPU BLAS", ms_cblas));
+ timings.push_back(std::pair<std::string, TimeResult>("CPU BLAS", time_cblas));
}
if (args.compare_cublas) {
auto buffers_host = BuffersHost<T>();
auto buffers_cuda = BuffersCUDA<T>();
DeviceToHost(args, buffers, buffers_host, queue, buffers_in_);
HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_);
- auto ms_cublas = 0.0;
+ TimeResult time_cublas;
try {
- ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
+ time_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
} catch (std::runtime_error e) { }
CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
- timings.push_back(std::pair<std::string, double>("cuBLAS", ms_cublas));
+ timings.push_back(std::pair<std::string, TimeResult>("cuBLAS", time_cublas));
}
// Prints the performance of the tested libraries
@@ -310,9 +312,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
// value found in the vector of timing results. The return value is in milliseconds.
template <typename T, typename U>
template <typename BufferType, typename RoutineType>
-double Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &args,
- BufferType &buffers, Queue &queue,
- RoutineType run_blas, const std::string &library_name) {
+typename Client<T,U>::TimeResult Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &args,
+ BufferType &buffers, Queue &queue,
+ RoutineType run_blas, const std::string &library_name) {
auto status = StatusCode::kSuccess;
// Do an optional warm-up to omit compilation times and initialisations from the measurements
@@ -342,7 +344,18 @@ double Client<T,U>::TimedExecution(const size_t num_runs, const Arguments<U> &ar
auto elapsed_time = std::chrono::steady_clock::now() - start_time;
timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
}
- return *std::min_element(timings.begin(), timings.end());
+
+ // Compute statistics
+ auto result = TimeResult();
+ const auto sum = std::accumulate(timings.begin(), timings.end(), 0.0);
+ const auto mean = sum / timings.size();
+ std::vector<double> diff(timings.size());
+ std::transform(timings.begin(), timings.end(), diff.begin(), [mean](double x) { return x - mean; });
+ const auto sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
+ result.mean = mean;
+ result.standard_deviation = std::sqrt(sq_sum / timings.size());
+ result.minimum = *std::min_element(timings.begin(), timings.end());
+ return result;
}
// =================================================================================================
@@ -363,17 +376,25 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) {
// Second line
for (auto &option: options_) { fprintf(stdout, "%9s;", option.c_str()); }
- fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1");
- if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); }
- if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); }
- if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); }
+ if (args.full_statistics) {
+ fprintf(stdout, "%9s;%9s;%9s", "min_ms_1", "mean_ms_1", "stddev_1");
+ if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_2", "mean_ms_2", "stddev_2"); }
+ if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_3", "mean_ms_3", "stddev_3"); }
+ if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "min_ms_4", "mean_ms_4", "stddev_4"); }
+ }
+ else {
+ fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1");
+ if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); }
+ if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); }
+ if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); }
+ }
fprintf(stdout, "\n");
}
// Print a performance-result row
template <typename T, typename U>
void Client<T,U>::PrintTableRow(const Arguments<U>& args,
- const std::vector<std::pair<std::string, double>>& timings) {
+ const std::vector<std::pair<std::string, TimeResult>>& timings) {
// Creates a vector of relevant variables
auto integers = std::vector<size_t>{};
@@ -441,16 +462,25 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
// Loops over all tested libraries
for (const auto& timing : timings) {
+ const auto library_name = timing.first;
+ const auto minimum_ms = timing.second.minimum;
+ if (library_name != "CLBlast") { fprintf(stdout, ";"); }
+
+ // Either output full statistics
+ if (args.full_statistics) {
+ const auto mean_ms = timing.second.mean;
+ const auto standard_deviation = timing.second.standard_deviation;
+ fprintf(stdout, "%9.3lf;%9.3lf;%9.3lf", minimum_ms, mean_ms, standard_deviation);
+ }
- // Computes the GFLOPS and GB/s metrics
- auto flops = get_flops_(args);
- auto bytes = get_bytes_(args);
- auto gflops = (timing.second != 0.0) ? (flops*1e-6)/timing.second : 0;
- auto gbs = (timing.second != 0.0) ? (bytes*1e-6)/timing.second : 0;
-
- // Outputs the performance numbers
- if (timing.first != "CLBlast") { fprintf(stdout, ";"); }
- fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf", timing.second, gflops, gbs);
+ // ... or outputs minimum time and the GFLOPS and GB/s metrics
+ else {
+ const auto flops = get_flops_(args);
+ const auto bytes = get_bytes_(args);
+ const auto gflops = (minimum_ms != 0.0) ? (flops*1e-6)/minimum_ms : 0;
+ const auto gbs = (minimum_ms != 0.0) ? (bytes*1e-6)/minimum_ms : 0;
+ fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf", minimum_ms, gflops, gbs);
+ }
}
fprintf(stdout, "\n");
}