diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-07-28 16:01:03 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-07-28 16:01:03 +0200 |
commit | fa84ac36f23c1aebb5facf946b21d8c0f1a4a46d (patch) | |
tree | 624d3830c45d40561f2e88946f4fd37e5addca74 /src/tuning/configurations.cpp | |
parent | dda1e567f872d3d89f2f7cd890fb5b29ff98537c (diff) |
The tuners now also check for valid local thread configurations and skip invalid ones completely, saving compilation time
Diffstat (limited to 'src/tuning/configurations.cpp')
-rw-r--r-- | src/tuning/configurations.cpp | 49 |
1 files changed, 42 insertions, 7 deletions
diff --git a/src/tuning/configurations.cpp b/src/tuning/configurations.cpp index 1fe232cf..82d7e3b4 100644 --- a/src/tuning/configurations.cpp +++ b/src/tuning/configurations.cpp @@ -23,28 +23,42 @@ namespace clblast { // Finds all configurations. It also applies the user-defined constraints within. std::vector<Configuration> SetConfigurations(const Device& device, const std::vector<Parameter> parameters, + const std::vector<size_t>& local_size_base, + const TransformVector& mul_local_config, + const TransformVector& div_local_config, const Constraints& constraints, const LocalMemSizeInfo& local_mem_size_info) { const auto local_mem_max = device.LocalMemSize(); + const auto max_work_item_sizes = device.MaxWorkItemSizes(); + const auto max_work_group_size = device.MaxWorkGroupSize(); auto config = Configuration(); auto configurations = std::vector<Configuration>(); - PopulateConfigurations(parameters, 0, config, configurations, - local_mem_max, constraints, local_mem_size_info); + PopulateConfigurations(parameters, local_size_base, mul_local_config, div_local_config, + 0, config, configurations, + local_mem_max, constraints, local_mem_size_info, + max_work_item_sizes, max_work_group_size); return configurations; } // Iterates recursively over all permutations of the user-defined parameters void PopulateConfigurations(const std::vector<Parameter> ¶meters, + const std::vector<size_t> local_size_base, + const TransformVector& mul_local_config, + const TransformVector& div_local_config, const size_t index, const Configuration &config, std::vector<Configuration> &configuration, const size_t local_mem_max, const Constraints& constraints, - const LocalMemSizeInfo& local_mem_size_info) { + const LocalMemSizeInfo& local_mem_size_info, + const std::vector<size_t>& max_work_item_sizes, + const size_t max_work_group_size) { // End of the chain: all parameters are considered, store the resulting configuration if it is a // valid one according to the constraints if (index == parameters.size()) { - if (ValidConfiguration(config, local_mem_max, constraints, local_mem_size_info)) { + if (ValidConfiguration(config, local_mem_max, constraints, local_mem_size_info, + local_size_base, mul_local_config, div_local_config, + max_work_item_sizes, max_work_group_size)) { configuration.push_back(config); } return; @@ -55,8 +69,10 @@ void PopulateConfigurations(const std::vector<Parameter> ¶meters, for (auto &value: parameter.second) { auto config_copy = config; config_copy[parameter.first] = value; - PopulateConfigurations(parameters, index+1, config_copy, configuration, - local_mem_max, constraints, local_mem_size_info); + PopulateConfigurations(parameters, local_size_base, mul_local_config, div_local_config, + index+1, config_copy, configuration, + local_mem_max, constraints, local_mem_size_info, + max_work_item_sizes, max_work_group_size); } } @@ -64,7 +80,12 @@ void PopulateConfigurations(const std::vector<Parameter> ¶meters, bool ValidConfiguration(const Configuration &config, const size_t local_mem_max, const Constraints& constraints, - const LocalMemSizeInfo& local_mem_size_info) { + const LocalMemSizeInfo& local_mem_size_info, + const std::vector<size_t> local_size_base, + const TransformVector& mul_local_config, + const TransformVector& div_local_config, + const std::vector<size_t>& max_work_item_sizes, + const size_t max_work_group_size) { // Iterates over all constraints for (auto &constraint: constraints) { @@ -92,6 +113,20 @@ bool ValidConfiguration(const Configuration &config, return false; } + // Checks the local thread size (both per dimension and in total) + const auto local = SetThreadConfiguration(config, local_size_base, + mul_local_config, div_local_config); + for (auto i=size_t{0}; i<local.size(); ++i) { + if (local[i] > max_work_item_sizes[i]) { + return false; + } + } + auto local_size = size_t{1}; + for (auto &item: local) { local_size *= item; } + if (local_size > max_work_group_size) { + return false; + } + // Everything was OK: this configuration is valid return true; } |