summaryrefslogtreecommitdiff
path: root/test/performance/client.hpp
blob: 47a130174258461dfebae65f3a4394917a277857 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
//   Cedric Nugteren <www.cedricnugteren.nl>
//
// This class implements the performance-test client. It is generic for all CLBlast routines by
// taking a number of routine-specific functions as arguments, such as how to compute buffer sizes
// or how to get the FLOPS count.
// Typename T: the data-type of the routine's memory buffers (==precision)
// Typename U: the data-type of the alpha and beta arguments
//
// This file also provides the common interface to the performance client (see the 'RunClient'
// function for details).
//
// =================================================================================================

#ifndef CLBLAST_TEST_PERFORMANCE_CLIENT_H_
#define CLBLAST_TEST_PERFORMANCE_CLIENT_H_

#include <string>
#include <vector>
#include <utility>

#include "utilities/utilities.hpp"

// The libraries to test
#ifdef CLBLAST_REF_CLBLAS
  #include <clBLAS.h>
#endif
#include "test/wrapper_cuda.hpp"
#include "clblast.h"

namespace clblast {
// =================================================================================================

// See comment at top of file for a description of the class
template <typename T, typename U>
class Client {
 public:
  static const int kSeed;

  // Shorthand for the routine-specific functions passed to the tester
  using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
  using Reference1 = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
  using Reference2 = std::function<StatusCode(const Arguments<U>&, BuffersHost<T>&, Queue&)>;
  using Reference3 = std::function<StatusCode(const Arguments<U>&, BuffersCUDA<T>&, Queue&)>;
  using SetMetric = std::function<void(Arguments<U>&)>;
  using GetMetric = std::function<size_t(const Arguments<U>&)>;

  // The constructor
  Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2,
         const Reference3 run_reference3, const std::vector<std::string> &options,
         const std::vector<std::string> &buffers_in, const std::vector<std::string> &buffers_out,
         const GetMetric get_flops, const GetMetric get_bytes);

  // Parses all command-line arguments, filling in the arguments structure. If no command-line
  // argument is given for a particular argument, it is filled in with a default value.
  Arguments<U> ParseArguments(int argc, char *argv[], const size_t level,
                              const GetMetric default_a_ld,
                              const GetMetric default_b_ld,
                              const GetMetric default_c_ld);

  // The main client function, setting-up arguments, matrices, OpenCL buffers, etc. After set-up, it
  // calls the client routines.
  void PerformanceTest(Arguments<U> &args, const SetMetric set_sizes);

 private:

  // Runs a function a given number of times and returns the execution time of the shortest instance
  template <typename BufferType, typename RoutineType>
  double TimedExecution(const size_t num_runs, const Arguments<U> &args, BufferType &buffers,
                        Queue &queue, RoutineType run_blas, const std::string &library_name);

  // Prints the header of a performance-data table
  void PrintTableHeader(const Arguments<U>& args);

  // Prints a row of performance data, including results of two libraries
  void PrintTableRow(const Arguments<U>& args,
                     const std::vector<std::pair<std::string, double>>& timings);

  // The routine-specific functions passed to the tester
  const Routine run_routine_;
  const Reference1 run_reference1_;
  const Reference2 run_reference2_;
  const Reference3 run_reference3_;
  const std::vector<std::string> options_;
  const std::vector<std::string> buffers_in_;
  const std::vector<std::string> buffers_out_;
  const GetMetric get_flops_;
  const GetMetric get_bytes_;

  // Extra arguments
  bool warm_up_; // if enabled, do a warm-up run first before measuring execution time
};

// =================================================================================================

// Bogus reference function, in case a comparison library is not available
template <typename T, typename U, typename BufferType>
static StatusCode ReferenceNotAvailable(const Arguments<U> &, BufferType &, Queue &) {
  return StatusCode::kNotImplemented;
}

// The interface to the performance client. This is a separate function in the header such that it
// is automatically compiled for each routine, templated by the parameter "C".
template <typename C, typename T, typename U>
void RunClient(int argc, char *argv[]) {

  // Sets the reference to test against
  #ifdef CLBLAST_REF_CLBLAS
    auto reference1 = C::RunReference1; // clBLAS when available
  #else
    auto reference1 = ReferenceNotAvailable<T,U,Buffers<T>>;
  #endif
  #ifdef CLBLAST_REF_CBLAS
    auto reference2 = C::RunReference2; // CBLAS when available
  #else
    auto reference2 = ReferenceNotAvailable<T,U,BuffersHost<T>>;
  #endif
  #ifdef CLBLAST_REF_CUBLAS
    auto reference3 = C::RunReference3; // cuBLAS when available
  #else
    auto reference3 = ReferenceNotAvailable<T,U,BuffersCUDA<T>>;
  #endif

  // Creates a new client
  auto client = Client<T,U>(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(),
                            C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes);

  // Simple command line argument parser with defaults
  auto args = client.ParseArguments(argc, argv, C::BLASLevel(),
                                    C::DefaultLDA, C::DefaultLDB, C::DefaultLDC);
  if (args.print_help) { return; }

  // Runs the client
  client.PerformanceTest(args, C::SetSizes);
}

// =================================================================================================
} // namespace clblast

// CLBLAST_TEST_PERFORMANCE_CLIENT_H_
#endif