diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-11-20 20:29:52 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-11-20 20:29:52 +0100 |
commit | e0f3484084ecf5e88ad71bd4542b7a2df6d3b707 (patch) | |
tree | 6e6c6f9d1c5433664794c9cf2b7400e66b1e63f1 | |
parent | 5467c0cac55b02bda69057fe0a130379934b8edb (diff) |
Fixes some displaying issues in the GEMM routine tuner
-rw-r--r-- | ROADMAP.md | 2 | ||||
-rw-r--r-- | src/tuning/routines/xgemm.cpp | 11 |
2 files changed, 7 insertions, 6 deletions
@@ -8,7 +8,7 @@ This file gives an overview of the main features planned for addition to CLBlast | - | Oct '17 | CNugteren | ✔ | CUDA API for CLBlast | | [#169](https://github.com/CNugteren/CLBlast/issues/169), [#195](https://github.com/CNugteren/CLBlast/issues/195) | Oct-Nov '17 | CNugteren | ✔ | Auto-tuning the kernel selection parameter | | [#181](https://github.com/CNugteren/CLBlast/issues/181), [#201](https://github.com/CNugteren/CLBlast/issues/201) | Nov '17 | CNugteren | ✔ | Compilation for Android and testing on a device | -| - | Nov '17 | CNugteren | | Integration of CLTune for easy testing on Android / fewer dependencies | +| - | Nov '17 | CNugteren | ✔ | Integration of CLTune for easy testing on Android / fewer dependencies | | [#128](https://github.com/CNugteren/CLBlast/issues/128), [#205](https://github.com/CNugteren/CLBlast/issues/205) | Nov-Dec '17 | CNugteren | | Pre-processor for loop unrolling and array-to-register-promotion for e.g. ARM Mali | | [#207](https://github.com/CNugteren/CLBlast/issues/207) | Dec '17 | CNugteren | | Tuning of the TRSM/TRSV routines | | [#169](https://github.com/CNugteren/CLBlast/issues/169) | '17 | dividiti | | Problem-specific tuning parameter selection | diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp index a12ab706..0cb32a2e 100644 --- a/src/tuning/routines/xgemm.cpp +++ b/src/tuning/routines/xgemm.cpp @@ -116,19 +116,20 @@ void TuneXgemm(int argc, char* argv[]) { } // Displaying results - printf("| value | indirect | direct | score | (lowest score == best switching point)\n"); - printf("x---------x-------------x-------------x----------x\n"); + printf("| || indirect GEMM || direct GEMM || |\n"); + printf("| m=n=k || ms | GFLOPS || ms | GFLOPS || score | (lowest score == best switching point)\n"); + printf("x---------xx--------x----------xx--------x----------xx----------x\n"); for (auto i = size_t{0}; i < indirect.size(); ++i) { assert(indirect[i].first == direct[i].first); const auto value = indirect[i].first; if (indirect[i].second != -1 && direct[i].second != -1) { const auto gflops_indirect = (2 * value * value * value) / (indirect[i].second * 1.0e6); const auto gflops_direct = (2 * value * value * value) / (direct[i].second * 1.0e6); - printf("| %7zu | %8.2lf ms | %8.2lf ms | %8.3lf |\n", - value, gflops_indirect, gflops_direct, scores[i].score); + printf("| %7zu || %6.2lf | %8.1lf || %6.2lf | %8.1lf || %8.3lf |\n", + value, indirect[i].second, gflops_indirect, direct[i].second, gflops_direct, scores[i].score); } } - printf("x---------x-------------x-------------x----------x\n"); + printf("x---------xx--------x----------xx--------x----------xx----------x\n"); printf("\n"); // Computes the best switching point |