summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-21 22:58:37 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-21 22:58:37 +0100
commit7a2371213bb900c8c726c346f85920199f424d82 (patch)
treede3562f21a76667f1af865905cfc84d8c20a7039 /src/tuning
parent52791bf3553bb47a50dea4ac234f7e1b09c4383c (diff)
Re-added support for local memory size constraint checking in the tuner
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/configurations.cpp34
-rw-r--r--src/tuning/configurations.hpp21
-rw-r--r--src/tuning/tuning.cpp14
-rw-r--r--src/tuning/tuning.hpp4
-rw-r--r--src/tuning/tuning_api.cpp14
5 files changed, 64 insertions, 23 deletions
diff --git a/src/tuning/configurations.cpp b/src/tuning/configurations.cpp
index 459d66b1..1fe232cf 100644
--- a/src/tuning/configurations.cpp
+++ b/src/tuning/configurations.cpp
@@ -21,11 +21,15 @@ namespace clblast {
// =================================================================================================
// Finds all configurations. It also applies the user-defined constraints within.
-std::vector<Configuration> SetConfigurations(const std::vector<Parameter> parameters,
- const Constraints& constraints) {
+std::vector<Configuration> SetConfigurations(const Device& device,
+ const std::vector<Parameter> parameters,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info) {
+ const auto local_mem_max = device.LocalMemSize();
auto config = Configuration();
auto configurations = std::vector<Configuration>();
- PopulateConfigurations(parameters, 0, config, configurations, constraints);
+ PopulateConfigurations(parameters, 0, config, configurations,
+ local_mem_max, constraints, local_mem_size_info);
return configurations;
}
@@ -33,12 +37,14 @@ std::vector<Configuration> SetConfigurations(const std::vector<Parameter> parame
void PopulateConfigurations(const std::vector<Parameter> &parameters,
const size_t index, const Configuration &config,
std::vector<Configuration> &configuration,
- const Constraints& constraints) {
+ const size_t local_mem_max,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info) {
// End of the chain: all parameters are considered, store the resulting configuration if it is a
// valid one according to the constraints
if (index == parameters.size()) {
- if (ValidConfiguration(config, constraints)) {
+ if (ValidConfiguration(config, local_mem_max, constraints, local_mem_size_info)) {
configuration.push_back(config);
}
return;
@@ -49,13 +55,16 @@ void PopulateConfigurations(const std::vector<Parameter> &parameters,
for (auto &value: parameter.second) {
auto config_copy = config;
config_copy[parameter.first] = value;
- PopulateConfigurations(parameters, index+1, config_copy, configuration, constraints);
+ PopulateConfigurations(parameters, index+1, config_copy, configuration,
+ local_mem_max, constraints, local_mem_size_info);
}
}
// Loops over all user-defined constraints to check whether or not the configuration is valid
bool ValidConfiguration(const Configuration &config,
- const Constraints& constraints) {
+ const size_t local_mem_max,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info) {
// Iterates over all constraints
for (auto &constraint: constraints) {
@@ -72,6 +81,17 @@ bool ValidConfiguration(const Configuration &config,
}
}
+ // Finds the values of the local memory parameters
+ auto local_mem_values = std::vector<size_t>(local_mem_size_info.parameters.size());
+ for (auto i=size_t{0}; i<local_mem_size_info.parameters.size(); ++i) {
+ local_mem_values[i] = config.at(local_mem_size_info.parameters[i]);
+ }
+
+ // Checks the local memory size
+ if (local_mem_size_info.local_mem_size(local_mem_values) > local_mem_max) {
+ return false;
+ }
+
// Everything was OK: this configuration is valid
return true;
}
diff --git a/src/tuning/configurations.hpp b/src/tuning/configurations.hpp
index 74679ff6..faa5498f 100644
--- a/src/tuning/configurations.hpp
+++ b/src/tuning/configurations.hpp
@@ -37,12 +37,21 @@ struct Constraint {
};
using Constraints = std::vector<Constraint>;
+// As above, but for local memory size
+using LocalMemSizeFunction = std::function<size_t(std::vector<size_t>)>;
+struct LocalMemSizeInfo {
+ LocalMemSizeFunction local_mem_size;
+ std::vector<std::string> parameters;
+};
+
// =================================================================================================
// Initializes an empty configuration (vector of name/value pairs) and kicks-off the recursive
// function to find all configurations. It also applies the user-defined constraints within.
-std::vector<Configuration> SetConfigurations(const std::vector<Parameter> parameters,
- const Constraints& constraints);
+std::vector<Configuration> SetConfigurations(const Device& device,
+ const std::vector<Parameter> parameters,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info);
// Iterates recursively over all permutations of the user-defined parameters. This code creates
// multiple chains, in which each chain selects a unique combination of values for all parameters.
@@ -51,14 +60,18 @@ std::vector<Configuration> SetConfigurations(const std::vector<Parameter> parame
void PopulateConfigurations(const std::vector<Parameter> &parameters,
const size_t index, const Configuration &config,
std::vector<Configuration> &configuration,
- const Constraints& constraints);
+ const size_t local_mem_max,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info);
// Loops over all user-defined constraints to check whether or not the configuration is valid.
// Assumes initially all configurations are valid, then returns false if one of the constraints has
// not been met. Constraints consist of a user-defined function and a list of parameter names, which
// are replaced by parameter values in this function.
bool ValidConfiguration(const Configuration &config,
- const Constraints& constraints);
+ const size_t local_mem_max,
+ const Constraints& constraints,
+ const LocalMemSizeInfo& local_mem_size_info);
// Processes multipliers and dividers to obtain the final thread configuration
std::vector<size_t> SetThreadConfiguration(const Configuration& config,
diff --git a/src/tuning/tuning.cpp b/src/tuning/tuning.cpp
index b5e01f65..dd4a83e6 100644
--- a/src/tuning/tuning.cpp
+++ b/src/tuning/tuning.cpp
@@ -93,6 +93,7 @@ void Tuner(int argc, char* argv[], const int V,
GetTunerSettingsFunc<T> GetTunerSettings,
TestValidArgumentsFunc<T> TestValidArguments,
SetConstraintsFunc SetConstraints,
+ ComputeLocalMemSizeFunc<T> ComputeLocalMemSize,
SetArgumentsFunc<T> SetArguments) {
constexpr auto kSeed = 42; // fixed seed for reproducibility
@@ -171,7 +172,8 @@ void Tuner(int argc, char* argv[], const int V,
}
// Sets the tunable parameters and their possible values
- auto configurations = SetConfigurations(settings.parameters, SetConstraints(V));
+ auto configurations = SetConfigurations(device, settings.parameters,
+ SetConstraints(V), ComputeLocalMemSize(V));
printf("* Found %s%zu configuration(s)%s\n",
kPrintMessage.c_str(), configurations.size(), kPrintEnd.c_str());
@@ -380,11 +382,11 @@ void Tuner(int argc, char* argv[], const int V,
}
// Compiles the above function
-template void Tuner<half>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<half> GetTunerSettings, TestValidArgumentsFunc<half> TestValidArguments, SetConstraintsFunc SetConstraints, SetArgumentsFunc<half> SetArguments);
-template void Tuner<float>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<float> GetTunerSettings, TestValidArgumentsFunc<float> TestValidArguments, SetConstraintsFunc SetConstraints, SetArgumentsFunc<float> SetArguments);
-template void Tuner<double>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<double> GetTunerSettings, TestValidArgumentsFunc<double> TestValidArguments, SetConstraintsFunc SetConstraints, SetArgumentsFunc<double> SetArguments);
-template void Tuner<float2>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<float2> GetTunerSettings, TestValidArgumentsFunc<float2> TestValidArguments, SetConstraintsFunc SetConstraints, SetArgumentsFunc<float2> SetArguments);
-template void Tuner<double2>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<double2> GetTunerSettings, TestValidArgumentsFunc<double2> TestValidArguments, SetConstraintsFunc SetConstraints, SetArgumentsFunc<double2> SetArguments);
+template void Tuner<half>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<half> GetTunerSettings, TestValidArgumentsFunc<half> TestValidArguments, SetConstraintsFunc SetConstraints, ComputeLocalMemSizeFunc<half> ComputeLocalMemSize, SetArgumentsFunc<half> SetArguments);
+template void Tuner<float>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<float> GetTunerSettings, TestValidArgumentsFunc<float> TestValidArguments, SetConstraintsFunc SetConstraints, ComputeLocalMemSizeFunc<float> ComputeLocalMemSize, SetArgumentsFunc<float> SetArguments);
+template void Tuner<double>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<double> GetTunerSettings, TestValidArgumentsFunc<double> TestValidArguments, SetConstraintsFunc SetConstraints, ComputeLocalMemSizeFunc<double> ComputeLocalMemSize, SetArgumentsFunc<double> SetArguments);
+template void Tuner<float2>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<float2> GetTunerSettings, TestValidArgumentsFunc<float2> TestValidArguments, SetConstraintsFunc SetConstraints, ComputeLocalMemSizeFunc<float2> ComputeLocalMemSize, SetArgumentsFunc<float2> SetArguments);
+template void Tuner<double2>(int argc, char* argv[], const int V, GetTunerDefaultsFunc GetTunerDefaults, GetTunerSettingsFunc<double2> GetTunerSettings, TestValidArgumentsFunc<double2> TestValidArguments, SetConstraintsFunc SetConstraints, ComputeLocalMemSizeFunc<double2> ComputeLocalMemSize, SetArgumentsFunc<double2> SetArguments);
// =================================================================================================
} // namespace clblast
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index cbecc300..37a042ff 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -108,6 +108,8 @@ template <typename T>
using TestValidArgumentsFunc = std::function<void(const int V, const Arguments<T> &args)>;
using SetConstraintsFunc = std::function<std::vector<Constraint>(const int V)>;
template <typename T>
+using ComputeLocalMemSizeFunc = std::function<LocalMemSizeInfo(const int V)>;
+template <typename T>
using SetArgumentsFunc = std::function<void(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers)>;
// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
@@ -119,6 +121,7 @@ void Tuner(int argc, char* argv[], const int V,
GetTunerSettingsFunc<T> GetTunerSettings,
TestValidArgumentsFunc<T> TestValidArguments,
SetConstraintsFunc SetConstraints,
+ ComputeLocalMemSizeFunc<T> ComputeLocalMemSize,
SetArgumentsFunc<T> SetArguments);
// Function to run the tuners through the CLBlast API, no I/O
@@ -128,6 +131,7 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V,
const GetTunerSettingsFunc<T> GetTunerSettings,
const TestValidArgumentsFunc<T> TestValidArguments,
const SetConstraintsFunc SetConstraints,
+ const ComputeLocalMemSizeFunc<T> ComputeLocalMemSize,
const SetArgumentsFunc<T> SetArguments,
std::unordered_map<std::string,size_t> &parameters);
diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp
index d03b428c..0c67d50b 100644
--- a/src/tuning/tuning_api.cpp
+++ b/src/tuning/tuning_api.cpp
@@ -206,6 +206,7 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V,
const GetTunerSettingsFunc<T> GetTunerSettings,
const TestValidArgumentsFunc<T> TestValidArguments,
const SetConstraintsFunc SetConstraints,
+ const ComputeLocalMemSizeFunc<T> ComputeLocalMemSize,
const SetArgumentsFunc<T> SetArguments,
std::unordered_map<std::string,size_t> &parameters) {
@@ -260,7 +261,8 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V,
}
// Sets the tunable parameters and their possible values
- auto configurations = SetConfigurations(settings.parameters, SetConstraints(V));
+ auto configurations = SetConfigurations(device, settings.parameters,
+ SetConstraints(V), ComputeLocalMemSize(V));
// Select the search method (full search or a random fraction)
if (args.fraction != 0.0 && args.fraction != 1.0) {
@@ -375,11 +377,11 @@ StatusCode TunerAPI(Queue &queue, const Arguments<T> &args, const int V,
}
// Compiles the above function
-template StatusCode TunerAPI<half>(Queue &queue, const Arguments<half> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<half> GetTunerSettings, const TestValidArgumentsFunc<half> TestValidArguments, const SetConstraintsFunc SetConstraints, const SetArgumentsFunc<half> SetArguments, std::unordered_map<std::string,size_t>&);
-template StatusCode TunerAPI<float>(Queue &queue, const Arguments<float> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<float> GetTunerSettings, const TestValidArgumentsFunc<float> TestValidArguments, const SetConstraintsFunc SetConstraints, const SetArgumentsFunc<float> SetArguments, std::unordered_map<std::string,size_t>&);
-template StatusCode TunerAPI<double>(Queue &queue, const Arguments<double> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<double> GetTunerSettings, const TestValidArgumentsFunc<double> TestValidArguments, const SetConstraintsFunc SetConstraints, const SetArgumentsFunc<double> SetArguments, std::unordered_map<std::string,size_t>&);
-template StatusCode TunerAPI<float2>(Queue &queue, const Arguments<float2> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<float2> GetTunerSettings, const TestValidArgumentsFunc<float2> TestValidArguments, const SetConstraintsFunc SetConstraints, const SetArgumentsFunc<float2> SetArguments, std::unordered_map<std::string,size_t>&);
-template StatusCode TunerAPI<double2>(Queue &queue, const Arguments<double2> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<double2> GetTunerSettings, const TestValidArgumentsFunc<double2> TestValidArguments, const SetConstraintsFunc SetConstraints, const SetArgumentsFunc<double2> SetArguments, std::unordered_map<std::string,size_t>&);
+template StatusCode TunerAPI<half>(Queue &queue, const Arguments<half> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<half> GetTunerSettings, const TestValidArgumentsFunc<half> TestValidArguments, const SetConstraintsFunc SetConstraints, const ComputeLocalMemSizeFunc<half> ComputeLocalMemSize, const SetArgumentsFunc<half> SetArguments, std::unordered_map<std::string,size_t>&);
+template StatusCode TunerAPI<float>(Queue &queue, const Arguments<float> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<float> GetTunerSettings, const TestValidArgumentsFunc<float> TestValidArguments, const SetConstraintsFunc SetConstraints, const ComputeLocalMemSizeFunc<float> ComputeLocalMemSize, const SetArgumentsFunc<float> SetArguments, std::unordered_map<std::string,size_t>&);
+template StatusCode TunerAPI<double>(Queue &queue, const Arguments<double> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<double> GetTunerSettings, const TestValidArgumentsFunc<double> TestValidArguments, const SetConstraintsFunc SetConstraints, const ComputeLocalMemSizeFunc<double> ComputeLocalMemSize, const SetArgumentsFunc<double> SetArguments, std::unordered_map<std::string,size_t>&);
+template StatusCode TunerAPI<float2>(Queue &queue, const Arguments<float2> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<float2> GetTunerSettings, const TestValidArgumentsFunc<float2> TestValidArguments, const SetConstraintsFunc SetConstraints, const ComputeLocalMemSizeFunc<float2> ComputeLocalMemSize, const SetArgumentsFunc<float2> SetArguments, std::unordered_map<std::string,size_t>&);
+template StatusCode TunerAPI<double2>(Queue &queue, const Arguments<double2> &args, const int V, const GetTunerDefaultsFunc GetTunerDefaults, const GetTunerSettingsFunc<double2> GetTunerSettings, const TestValidArgumentsFunc<double2> TestValidArguments, const SetConstraintsFunc SetConstraints, const ComputeLocalMemSizeFunc<double2> ComputeLocalMemSize, const SetArgumentsFunc<double2> SetArguments, std::unordered_map<std::string,size_t>&);
// =================================================================================================
} // namespace clblast