summaryrefslogtreecommitdiff
path: root/src/tuning/configurations.cpp
blob: 82d7e3b41cd9d8a340ad6880285f22b6250c0835 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
//   Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the parameter configurations for the CLBlast auto-tuner (taken from CLTune).
// This is only used for the optional tuner binaries and not part of the core of CLBlast.
//
// =================================================================================================

#include <vector>
#include <string>

#include "tuning/configurations.hpp"

namespace clblast {
// =================================================================================================

// Finds all configurations. It also applies the user-defined constraints within.
std::vector<Configuration> SetConfigurations(const Device& device,
                                             const std::vector<Parameter> parameters,
                                             const std::vector<size_t>& local_size_base,
                                             const TransformVector& mul_local_config,
                                             const TransformVector& div_local_config,
                                             const Constraints& constraints,
                                             const LocalMemSizeInfo& local_mem_size_info) {
  const auto local_mem_max = device.LocalMemSize();
  const auto max_work_item_sizes = device.MaxWorkItemSizes();
  const auto max_work_group_size = device.MaxWorkGroupSize();
  auto config = Configuration();
  auto configurations = std::vector<Configuration>();
  PopulateConfigurations(parameters, local_size_base, mul_local_config, div_local_config,
                         0, config, configurations,
                         local_mem_max, constraints, local_mem_size_info,
                         max_work_item_sizes, max_work_group_size);
  return configurations;
}

// Iterates recursively over all permutations of the user-defined parameters
void PopulateConfigurations(const std::vector<Parameter> &parameters,
                            const std::vector<size_t> local_size_base,
                            const TransformVector& mul_local_config,
                            const TransformVector& div_local_config,
                            const size_t index, const Configuration &config,
                            std::vector<Configuration> &configuration,
                            const size_t local_mem_max,
                            const Constraints& constraints,
                            const LocalMemSizeInfo& local_mem_size_info,
                            const std::vector<size_t>& max_work_item_sizes,
                            const size_t max_work_group_size) {

  // End of the chain: all parameters are considered, store the resulting configuration if it is a
  // valid one according to the constraints
  if (index == parameters.size()) {
    if (ValidConfiguration(config, local_mem_max, constraints, local_mem_size_info,
                           local_size_base, mul_local_config, div_local_config,
                           max_work_item_sizes, max_work_group_size)) {
      configuration.push_back(config);
    }
    return;
  }

  // This loop iterates over all values of the current parameter and calls this function recursively
  Parameter parameter = parameters[index];
  for (auto &value: parameter.second) {
    auto config_copy = config;
    config_copy[parameter.first] = value;
    PopulateConfigurations(parameters, local_size_base, mul_local_config, div_local_config,
                           index+1, config_copy, configuration,
                           local_mem_max, constraints, local_mem_size_info,
                           max_work_item_sizes, max_work_group_size);
  }
}

// Loops over all user-defined constraints to check whether or not the configuration is valid
bool ValidConfiguration(const Configuration &config,
                        const size_t local_mem_max,
                        const Constraints& constraints,
                        const LocalMemSizeInfo& local_mem_size_info,
                        const std::vector<size_t> local_size_base,
                        const TransformVector& mul_local_config,
                        const TransformVector& div_local_config,
                        const std::vector<size_t>& max_work_item_sizes,
                        const size_t max_work_group_size) {

  // Iterates over all constraints
  for (auto &constraint: constraints) {

    // Finds the values of the parameters
    auto values = std::vector<size_t>(constraint.parameters.size());
    for (auto i=size_t{0}; i<constraint.parameters.size(); ++i) {
      values[i] = config.at(constraint.parameters[i]);
    }

    // Checks this constraint for these values
    if (!constraint.valid_if(values)) {
      return false;
    }
  }

  // Finds the values of the local memory parameters
  auto local_mem_values = std::vector<size_t>(local_mem_size_info.parameters.size());
  for (auto i=size_t{0}; i<local_mem_size_info.parameters.size(); ++i) {
    local_mem_values[i] = config.at(local_mem_size_info.parameters[i]);
  }

  // Checks the local memory size
  if (local_mem_size_info.local_mem_size(local_mem_values) > local_mem_max) {
    return false;
  }

  // Checks the local thread size (both per dimension and in total)
  const auto local = SetThreadConfiguration(config, local_size_base,
                                            mul_local_config, div_local_config);
  for (auto i=size_t{0}; i<local.size(); ++i) {
    if (local[i] > max_work_item_sizes[i]) {
      return false;
    }
  }
  auto local_size = size_t{1};
  for (auto &item: local) { local_size *= item; }
  if (local_size > max_work_group_size) {
    return false;
  }

  // Everything was OK: this configuration is valid
  return true;
}

// Multiplies and/or dividers a thread configuration (local/global)
std::vector<size_t> SetThreadConfiguration(const Configuration& config,
                                           const std::vector<size_t> base,
                                           const TransformVector& mul_config,
                                           const TransformVector& div_config) {
  auto result = base;
  for (const auto &multipliers: mul_config) {
    for (auto i = size_t{0}; i < multipliers.size(); ++i) {
      result[i] *= config.at(multipliers[i]);
    }
  }
  for (const auto &dividers: div_config) {
    for (auto i = size_t{0}; i < dividers.size(); ++i) {
      result[i] /= config.at(dividers[i]);
    }
  }
  return result;
}

// =================================================================================================
} // namespace clblast