summaryrefslogtreecommitdiff
path: root/test/performance/client.hpp
blob: 5ff2aec783a92732ecbf5afc69f1e1353114b51b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
//   Cedric Nugteren <www.cedricnugteren.nl>
//
// This class implements the performance-test client. It is generic for all CLBlast routines by
// taking a number of routine-specific functions as arguments, such as how to compute buffer sizes
// or how to get the FLOPS count.
// Typename T: the data-type of the routine's memory buffers (==precision)
// Typename U: the data-type of the alpha and beta arguments
//
// This file also provides the common interface to the performance client (see the 'RunClient'
// function for details).
//
// =================================================================================================

#ifndef CLBLAST_TEST_PERFORMANCE_CLIENT_H_
#define CLBLAST_TEST_PERFORMANCE_CLIENT_H_

#include <string>
#include <vector>
#include <utility>

// The libraries to test
#ifdef CLBLAST_REF_CLBLAS
  #include <clBLAS.h>
#endif
#include "clblast.h"

#include "utilities.hpp"

namespace clblast {
// =================================================================================================

// See comment at top of file for a description of the class
template <typename T, typename U>
class Client {
 public:

  // Shorthand for the routine-specific functions passed to the tester
  using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
  using SetMetric = std::function<void(Arguments<U>&)>;
  using GetMetric = std::function<size_t(const Arguments<U>&)>;

  // The constructor
  Client(const Routine run_routine, const Routine run_reference1, const Routine run_reference2,
         const std::vector<std::string> &options,
         const GetMetric get_flops, const GetMetric get_bytes);

  // Parses all command-line arguments, filling in the arguments structure. If no command-line
  // argument is given for a particular argument, it is filled in with a default value.
  Arguments<U> ParseArguments(int argc, char *argv[], const size_t level,
                              const GetMetric default_a_ld,
                              const GetMetric default_b_ld,
                              const GetMetric default_c_ld);

  // The main client function, setting-up arguments, matrices, OpenCL buffers, etc. After set-up, it
  // calls the client routines.
  void PerformanceTest(Arguments<U> &args, const SetMetric set_sizes);

 private:

  // Runs a function a given number of times and returns the execution time of the shortest instance
  double TimedExecution(const size_t num_runs, const Arguments<U> &args, Buffers<T> &buffers,
                        Queue &queue, Routine run_blas, const std::string &library_name);

  // Prints the header of a performance-data table
  void PrintTableHeader(const Arguments<U>& args);

  // Prints a row of performance data, including results of two libraries
  void PrintTableRow(const Arguments<U>& args,
                     const std::vector<std::pair<std::string, double>>& timings);

  // The routine-specific functions passed to the tester
  const Routine run_routine_;
  const Routine run_reference1_;
  const Routine run_reference2_;
  const std::vector<std::string> options_;
  const GetMetric get_flops_;
  const GetMetric get_bytes_;
};

// =================================================================================================

// Bogus reference function, in case a comparison library is not available
template <typename T, typename U>
static StatusCode ReferenceNotAvailable(const Arguments<U> &, Buffers<T> &, Queue &) {
  return StatusCode::kNotImplemented;
}

// The interface to the performance client. This is a separate function in the header such that it
// is automatically compiled for each routine, templated by the parameter "C".
template <typename C, typename T, typename U>
void RunClient(int argc, char *argv[]) {

  // Sets the reference to test against
  #ifdef CLBLAST_REF_CLBLAS
    auto reference1 = C::RunReference1; // clBLAS when available
  #else
    auto reference1 = ReferenceNotAvailable<T,U>;
  #endif
  #ifdef CLBLAST_REF_CBLAS
    auto reference2 = C::RunReference2; // CBLAS when available
  #else
    auto reference2 = ReferenceNotAvailable<T,U>;
  #endif

  // Creates a new client
  auto client = Client<T,U>(C::RunRoutine, reference1, reference2, C::GetOptions(),
                            C::GetFlops, C::GetBytes);

  // Simple command line argument parser with defaults
  auto args = client.ParseArguments(argc, argv, C::BLASLevel(),
                                    C::DefaultLDA, C::DefaultLDB, C::DefaultLDC);
  if (args.print_help) { return; }

  // Runs the client
  client.PerformanceTest(args, C::SetSizes);
}

// =================================================================================================
} // namespace clblast

// CLBLAST_TEST_PERFORMANCE_CLIENT_H_
#endif