summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-11-20 20:29:52 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-11-20 20:29:52 +0100
commite0f3484084ecf5e88ad71bd4542b7a2df6d3b707 (patch)
tree6e6c6f9d1c5433664794c9cf2b7400e66b1e63f1
parent5467c0cac55b02bda69057fe0a130379934b8edb (diff)
Fixes some displaying issues in the GEMM routine tuner
-rw-r--r--ROADMAP.md2
-rw-r--r--src/tuning/routines/xgemm.cpp11
2 files changed, 7 insertions, 6 deletions
diff --git a/ROADMAP.md b/ROADMAP.md
index 4209c239..ad15d16c 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -8,7 +8,7 @@ This file gives an overview of the main features planned for addition to CLBlast
| - | Oct '17 | CNugteren | ✔ | CUDA API for CLBlast |
| [#169](https://github.com/CNugteren/CLBlast/issues/169), [#195](https://github.com/CNugteren/CLBlast/issues/195) | Oct-Nov '17 | CNugteren | ✔ | Auto-tuning the kernel selection parameter |
| [#181](https://github.com/CNugteren/CLBlast/issues/181), [#201](https://github.com/CNugteren/CLBlast/issues/201) | Nov '17 | CNugteren | ✔ | Compilation for Android and testing on a device |
-| - | Nov '17 | CNugteren | | Integration of CLTune for easy testing on Android / fewer dependencies |
+| - | Nov '17 | CNugteren | ✔ | Integration of CLTune for easy testing on Android / fewer dependencies |
| [#128](https://github.com/CNugteren/CLBlast/issues/128), [#205](https://github.com/CNugteren/CLBlast/issues/205) | Nov-Dec '17 | CNugteren | | Pre-processor for loop unrolling and array-to-register-promotion for e.g. ARM Mali |
| [#207](https://github.com/CNugteren/CLBlast/issues/207) | Dec '17 | CNugteren | | Tuning of the TRSM/TRSV routines |
| [#169](https://github.com/CNugteren/CLBlast/issues/169) | '17 | dividiti | | Problem-specific tuning parameter selection |
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp
index a12ab706..0cb32a2e 100644
--- a/src/tuning/routines/xgemm.cpp
+++ b/src/tuning/routines/xgemm.cpp
@@ -116,19 +116,20 @@ void TuneXgemm(int argc, char* argv[]) {
}
// Displaying results
- printf("| value | indirect | direct | score | (lowest score == best switching point)\n");
- printf("x---------x-------------x-------------x----------x\n");
+ printf("| || indirect GEMM || direct GEMM || |\n");
+ printf("| m=n=k || ms | GFLOPS || ms | GFLOPS || score | (lowest score == best switching point)\n");
+ printf("x---------xx--------x----------xx--------x----------xx----------x\n");
for (auto i = size_t{0}; i < indirect.size(); ++i) {
assert(indirect[i].first == direct[i].first);
const auto value = indirect[i].first;
if (indirect[i].second != -1 && direct[i].second != -1) {
const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6);
const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6);
- printf("| %7zu | %8.2lf ms | %8.2lf ms | %8.3lf |\n",
- value, gflops_indirect, gflops_direct, scores[i].score);
+ printf("| %7zu || %6.2lf | %8.1lf || %6.2lf | %8.1lf || %8.3lf |\n",
+ value, indirect[i].second, gflops_indirect, direct[i].second, gflops_direct, scores[i].score);
}
}
- printf("x---------x-------------x-------------x----------x\n");
+ printf("x---------xx--------x----------xx--------x----------xx----------x\n");
printf("\n");
// Computes the best switching point