summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-07-23 21:00:10 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-07-25 22:28:44 +0200
commit6a8b9e24f2428c140dac97d8279cbb99d051c59d (patch)
tree7405472387c4852431abd8151faddc341ca64457
parentf8fb707fa440d1ce8b319bec8efe3c20d21dcd37 (diff)
Added code to report the average tuning results
-rw-r--r--CHANGELOG4
-rw-r--r--src/tuning/tuning.cpp11
2 files changed, 14 insertions, 1 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4ad70a95..c4a758f1 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,8 @@
+Development (next version)
+- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah')
+- Various minor fixes and enhancements
+
Version 1.4.1
- Fixed an access violation under Windows upon releasing the OpenCL program when the driver is already unloaded
- Fixed an issue with double cl_program release in the CLBlast caching system
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index 216f4b31..822f8851 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -342,8 +342,17 @@ void Tuner(int argc, char* argv[], const int V,
const auto best_time_ms = best_configuration->score;
if (best_time_ms == 0.0) { return; }
- // Also prints the performance of the best-case in terms of GB/s or GFLOPS
+ // Computes and prints some other statistics
+ auto average_ms = 0.0;
+ for (const auto result : results) { average_ms += result.score; }
+ average_ms /= results.size();
printf("\n");
+ printf("* Got average result of %.2lf ms", average_ms);
+ printf(": %.1lf %s\n", settings.metric_amount / (average_ms * 1.0e6),
+ settings.performance_unit.c_str());
+
+
+ // Also prints the performance of the best-case in terms of GB/s or GFLOPS
printf("* Found best result %.2lf ms", best_time_ms);
printf(": %.1lf %s\n", settings.metric_amount / (best_time_ms * 1.0e6),
settings.performance_unit.c_str());