1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file provides helper functions for time measurement and such.
//
// =================================================================================================
#ifndef CLBLAST_TIMING_H_
#define CLBLAST_TIMING_H_
#include <cstdio>
#include <utility>
#include <vector>
#include <algorithm>
#include <chrono>
#include "utilities/utilities.hpp"
namespace clblast {
// =================================================================================================
template <typename F>
double TimeFunction(const size_t num_runs, F const &function) {
function(); // warm-up
auto timings = std::vector<double>(num_runs);
for (auto &timing: timings) {
const auto start_time = std::chrono::steady_clock::now();
function();
const auto elapsed_time = std::chrono::steady_clock::now() - start_time;
timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
}
return *std::min_element(timings.begin(), timings.end());
}
// =================================================================================================
double RunKernelTimed(const size_t num_runs, Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local);
double TimeKernel(const size_t num_runs, Kernel &kernel, Queue &queue, const Device &device,
std::vector<size_t> global, const std::vector<size_t> &local);
// =================================================================================================
using Timing = std::pair<size_t, double>;
template <typename T, typename F>
std::vector<Timing> TimeRoutine(const size_t from, const size_t to, const size_t step,
const size_t num_runs, const Queue& queue,
const std::vector<Buffer<T>>& buffers, F const &routine) {
auto timings = std::vector<Timing>();
for (auto value = from; value < to; value += step) {
printf("[ RUN ] Running with value %zu\n", value);
try {
const auto FunctionToTune = [&]() { routine(value, queue, buffers); };
const auto time_ms = TimeFunction(num_runs, FunctionToTune);
printf("[ OK ] Took %.2lf ms\n", time_ms);
timings.push_back({value, time_ms});
}
catch (...) {
printf("[ ERROR ] Exception caught\n");
timings.push_back({value, -1.0}); // invalid
}
}
return timings;
}
// =================================================================================================
} // namespace clblast
// CLBLAST_TIMING_H_
#endif
|