summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG4
-rw-r--r--src/tuning/tuning.cpp11
2 files changed, 14 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4ad70a95..c4a758f1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,8 @@
+Development (next version)
+- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah')
+- Various minor fixes and enhancements
+
Version 1.4.1
- Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded
- Fixed an issue with double cl_program release in the CLBlast caching system
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index 216f4b31..822f8851 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -342,8 +342,17 @@ void Tuner(int argc, char* argv[], const int V,
const auto best_time_ms = best_configuration->score;
if (best_time_ms == 0.0) { return; }
- // Also prints the performance of the best-case in terms of GB/s or GFLOPS
+ // Computes and prints some other statistics
+ auto average_ms = 0.0;
+ for (const auto result : results) { average_ms += result.score; }
+ average_ms /= results.size();
printf("\n");
+ printf("* Got average result of %.2lf ms", average_ms);
+ printf(": %.1lf %s\n", settings.metric_amount / (average_ms * 1.0e6),
+ settings.performance_unit.c_str());
+
+
+ // Also prints the performance of the best-case in terms of GB/s or GFLOPS
printf("* Found best result %.2lf ms", best_time_ms);
printf(": %.1lf %s\n", settings.metric_amount / (best_time_ms * 1.0e6),
settings.performance_unit.c_str());