diff options
-rw-r--r-- | CHANGELOG | 4 | ||||
-rw-r--r-- | src/tuning/tuning.cpp | 11 |
2 files changed, 14 insertions, 1 deletions
@@ -1,4 +1,8 @@ +Development (next version) +- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah') +- Various minor fixes and enhancements + Version 1.4.1 - Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded - Fixed an issue with double cl_program release in the CLBlast caching system diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp index 216f4b31..822f8851 100644 --- a/src/tuning/tuning.cpp +++ b/src/tuning/tuning.cpp @@ -342,8 +342,17 @@ void Tuner(int argc, char* argv[], const int V, const auto best_time_ms = best_configuration->score; if (best_time_ms == 0.0) { return; } - // Also prints the performance of the best-case in terms of GB/s or GFLOPS + // Computes and prints some other statistics + auto average_ms = 0.0; + for (const auto result : results) { average_ms += result.score; } + average_ms /= results.size(); printf("\n"); + printf("* Got average result of %.2lf ms", average_ms); + printf(": %.1lf %s\n", settings.metric_amount / (average_ms * 1.0e6), + settings.performance_unit.c_str()); + + + // Also prints the performance of the best-case in terms of GB/s or GFLOPS printf("* Found best result %.2lf ms", best_time_ms); printf(": %.1lf %s\n", settings.metric_amount / (best_time_ms * 1.0e6), settings.performance_unit.c_str()); |