diff options
Diffstat (limited to 'src/tuning/tuning.hpp')
-rw-r--r-- | src/tuning/tuning.hpp | 147 |
1 files changed, 108 insertions, 39 deletions
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp index 1f9b6f4f..bc9c0e03 100644 --- a/src/tuning/tuning.hpp +++ b/src/tuning/tuning.hpp @@ -18,6 +18,7 @@ #include <vector> #include <string> #include <random> +#include <utility> #include <cltune.h> @@ -26,6 +27,73 @@ namespace clblast { // ================================================================================================= +// Structures for the tuners with all the default settings +struct TunerDefaults { + + // The list of arguments relevant for this routine + std::vector<std::string> options = {}; + + // Default sizes + size_t default_m = 1; + size_t default_n = 1; + size_t default_k = 1; + + // Other defaults + size_t default_batch_count = 1; + size_t default_num_runs = 10; // run every kernel this many times for averaging + + // Search heuristic defaults + double default_fraction = 1.0; + size_t default_swarm_size_PSO = 8; + double default_influence_global_PSO = 0.1; + double default_influence_local_PSO = 0.3; + double default_influence_random_PSO = 0.6; + size_t default_heuristic = static_cast<size_t>(cltune::SearchMethod::FullSearch); + double default_max_temp_ann = 1.0; +}; + +// Structures for the tuners with the remaining settings +struct TunerSettings { + + // The representative kernel and the source code + std::string kernel_family; + std::string kernel_name; + std::string sources; + + // Describes how to obtain the sizes of the buffers + size_t size_x = 1; + size_t size_y = 1; + size_t size_a = 1; + size_t size_b = 1; + size_t size_c = 1; + size_t size_temp = 1; + + // Sets the base thread configuration + std::vector<size_t> global_size = {}; + std::vector<size_t> global_size_ref = {}; + std::vector<size_t> local_size = {}; + std::vector<size_t> local_size_ref = {}; + + // Transforms the thread configuration based on the parameters + using TransformVector = std::vector<std::vector<std::string>>; + TransformVector mul_local = {}; + TransformVector div_local = {}; + TransformVector mul_global = {}; + TransformVector div_global = {}; + + // Sets the tuning parameters and their possible values + std::vector<std::pair<std::string, std::vector<size_t>>> parameters; + + // Describes how to compute the performance metrics + size_t metric_amount = 0; + std::string performance_unit = "N/A"; + + // Returns which search heuristic to use + size_t heuristic = static_cast<size_t>(cltune::SearchMethod::FullSearch); +}; + +// ================================================================================================= + // Function to get command-line argument, set-up the input buffers, configure the tuner, and collect // the results. Used for all types of kernel families. Note that this is a header-only function so // that it is automatically compiled for the various kernels (given as the 'C' template argument). @@ -34,30 +102,31 @@ void Tuner(int argc, char* argv[]) { constexpr auto kSeed = 42; // fixed seed for reproducibility // Sets the parameters and platform/device for which to tune (command-line options) + const TunerDefaults defaults = C::GetTunerDefaults(); auto command_line_args = RetrieveCommandLineArguments(argc, argv); auto help = std::string{"* Options given/available:\n"}; auto args = Arguments<T>{}; args.platform_id = GetArgument(command_line_args, help, kArgPlatform, ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0})); args.device_id = GetArgument(command_line_args, help, kArgDevice, ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0})); args.precision = GetArgument(command_line_args, help, kArgPrecision, Precision::kSingle); - for (auto &o: C::GetOptions()) { - if (o == kArgM) { args.m = GetArgument(command_line_args, help, kArgM, C::DefaultM()); } - if (o == kArgN) { args.n = GetArgument(command_line_args, help, kArgN, C::DefaultN()); } - if (o == kArgK) { args.k = GetArgument(command_line_args, help, kArgK, C::DefaultK()); } + for (auto &o: defaults.options) { + if (o == kArgM) { args.m = GetArgument(command_line_args, help, kArgM, defaults.default_m); } + if (o == kArgN) { args.n = GetArgument(command_line_args, help, kArgN, defaults.default_n); } + if (o == kArgK) { args.k = GetArgument(command_line_args, help, kArgK, defaults.default_k); } if (o == kArgAlpha) { args.alpha = GetArgument(command_line_args, help, kArgAlpha, GetScalar<T>()); } if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<T>()); } - if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, C::DefaultFraction()); } - if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, C::DefaultBatchCount()); } - if (o == kArgHeuristicSelection) {args.heuristic_selection = GetArgument(command_line_args, help, kArgHeuristicSelection, C::DefaultHeuristic()); } - if (o == kArgPsoSwarmSize) {args.pso_swarm_size = GetArgument(command_line_args, help, kArgPsoSwarmSize , C::DefaultSwarmSizePSO()); } - if (o == kArgPsoInfGlobal) {args.pso_inf_global = GetArgument(command_line_args, help, kArgPsoInfGlobal, C::DefaultInfluenceGlobalPSO()); } - if (o == kArgPsoInfLocal) {args.pso_inf_local = GetArgument(command_line_args, help, kArgPsoInfLocal, C::DefaultInfluenceLocalPSO()); } - if (o == kArgPsoInfRandom) {args.pso_inf_random = GetArgument(command_line_args, help, kArgPsoInfRandom, C::DefaultInfluenceRandomPSO()); } - if (o == kArgAnnMaxTemp) {args.ann_max_temperature = GetArgument(command_line_args, help, kArgAnnMaxTemp, C::DefaultMaxTempAnn());} + if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, defaults.default_fraction); } + if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, defaults.default_batch_count); } + if (o == kArgHeuristicSelection) {args.heuristic_selection = GetArgument(command_line_args, help, kArgHeuristicSelection, defaults.default_heuristic); } + if (o == kArgPsoSwarmSize) {args.pso_swarm_size = GetArgument(command_line_args, help, kArgPsoSwarmSize , defaults.default_swarm_size_PSO); } + if (o == kArgPsoInfGlobal) {args.pso_inf_global = GetArgument(command_line_args, help, kArgPsoInfGlobal, defaults.default_influence_global_PSO); } + if (o == kArgPsoInfLocal) {args.pso_inf_local = GetArgument(command_line_args, help, kArgPsoInfLocal, defaults.default_influence_local_PSO); } + if (o == kArgPsoInfRandom) {args.pso_inf_random = GetArgument(command_line_args, help, kArgPsoInfRandom, defaults.default_influence_random_PSO); } + if (o == kArgAnnMaxTemp) {args.ann_max_temperature = GetArgument(command_line_args, help, kArgAnnMaxTemp, defaults.default_max_temp_ann); } } - const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, C::DefaultNumRuns()); - + const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, defaults.default_num_runs); fprintf(stdout, "%s\n", help.c_str()); + const TunerSettings settings = C::GetTunerSettings(args); // Tests validity of the given arguments C::TestValidArguments(args); @@ -87,12 +156,12 @@ void Tuner(int argc, char* argv[]) { } // Creates input buffers with random data - auto x_vec = std::vector<T>(C::GetSizeX(args)); - auto y_vec = std::vector<T>(C::GetSizeY(args)); - auto a_mat = std::vector<T>(C::GetSizeA(args)); - auto b_mat = std::vector<T>(C::GetSizeB(args)); - auto c_mat = std::vector<T>(C::GetSizeC(args)); - auto temp = std::vector<T>(C::GetSizeTemp(args)); + auto x_vec = std::vector<T>(settings.size_x); + auto y_vec = std::vector<T>(settings.size_y); + auto a_mat = std::vector<T>(settings.size_a); + auto b_mat = std::vector<T>(settings.size_b); + auto c_mat = std::vector<T>(settings.size_c); + auto temp = std::vector<T>(settings.size_temp); std::mt19937 mt(kSeed); std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); PopulateVector(x_vec, mt, dist); @@ -105,15 +174,13 @@ void Tuner(int argc, char* argv[]) { // Initializes the tuner for the chosen device cltune::Tuner tuner(args.platform_id, args.device_id); - // Select the search method based on the cmd_line arguments - // If the tuner does not support the selected choice, Full Search will be returned. - auto method = C::GetHeuristic(args); - + // Select the search method based on the command-line arguments + // If the tuner does not support the selected choice, full search will be returned. + auto method = settings.heuristic; if (method == 1) { tuner.UseRandomSearch(1.0/args.fraction); } else if (method == 2) { tuner.UseAnnealing(1.0/args.fraction, args.ann_max_temperature); } - else if (method == 3) { - tuner.UsePSO(1.0/args.fraction, args.pso_swarm_size, args.pso_inf_global, args.pso_inf_local, args.pso_inf_random); - } + else if (method == 3) { tuner.UsePSO(1.0/args.fraction, args.pso_swarm_size, args.pso_inf_global, + args.pso_inf_local, args.pso_inf_random); } else { tuner.UseFullSearch(); } // Set extra settings for specific defines. This mimics src/routine.cc. @@ -127,12 +194,14 @@ void Tuner(int argc, char* argv[]) { } // Loads the kernel sources and defines the kernel to tune - auto sources = defines + C::GetSources(); - auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize()); - tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef()); + auto sources = defines + settings.sources; + auto id = tuner.AddKernelFromString(sources, settings.kernel_name, settings.global_size, settings.local_size); + tuner.SetReferenceFromString(sources, settings.kernel_name, settings.global_size_ref, settings.local_size_ref); // Sets the tunable parameters and their possible values - C::SetParameters(tuner, id); + for (const auto ¶meter: settings.parameters) { + tuner.AddParameter(id, parameter.first, parameter.second); + } C::SetConstraints(tuner, id); C::SetLocalMemorySize(tuner, id, args); @@ -141,10 +210,10 @@ void Tuner(int argc, char* argv[]) { tuner.AddParameterReference("PRECISION", static_cast<size_t>(args.precision)); // Modifies the thread-sizes (both global and local) based on the parameters - for (auto ¶meters: C::MulLocal()) { tuner.MulLocalSize(id, parameters); } - for (auto ¶meters: C::DivLocal()) { tuner.DivLocalSize(id, parameters); } - for (auto ¶meters: C::MulGlobal()) { tuner.MulGlobalSize(id, parameters); } - for (auto ¶meters: C::DivGlobal()) { tuner.DivGlobalSize(id, parameters); } + for (auto ¶meters: settings.mul_local) { tuner.MulLocalSize(id, parameters); } + for (auto ¶meters: settings.div_local) { tuner.DivLocalSize(id, parameters); } + for (auto ¶meters: settings.mul_global) { tuner.MulGlobalSize(id, parameters); } + for (auto ¶meters: settings.div_global) { tuner.DivGlobalSize(id, parameters); } // Sets the function's arguments C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat, temp); @@ -160,20 +229,20 @@ void Tuner(int argc, char* argv[]) { // Also prints the performance of the best-case in terms of GB/s or GFLOPS if (time_ms != 0.0) { printf("[ -------> ] %.2lf ms", time_ms); - printf(" or %.1lf %s\n", C::GetMetric(args)/(time_ms*1.0e6), C::PerformanceUnit().c_str()); + printf(" or %.1lf %s\n", settings.metric_amount/(time_ms*1.0e6), settings.performance_unit.c_str()); } // Outputs the results as JSON to disk, including some meta-data auto precision_string = std::to_string(static_cast<size_t>(args.precision)); auto metadata = std::vector<std::pair<std::string,std::string>>{ - {"kernel_family", C::KernelFamily()}, + {"kernel_family", settings.kernel_family}, {"precision", precision_string}, {"clblast_device_type", device_type}, {"clblast_device_vendor", device_vendor}, {"clblast_device_architecture", device_architecture}, {"clblast_device_name", device_name} }; - for (auto &o: C::GetOptions()) { + for (auto &o: defaults.options) { if (o == kArgM) { metadata.push_back({"arg_m", std::to_string(args.m)}); } if (o == kArgN) { metadata.push_back({"arg_n", std::to_string(args.n)}); } if (o == kArgK) { metadata.push_back({"arg_k", std::to_string(args.k)}); } @@ -181,7 +250,7 @@ void Tuner(int argc, char* argv[]) { if (o == kArgBeta) { metadata.push_back({"arg_beta", ToString(args.beta)}); } if (o == kArgBatchCount) { metadata.push_back({"arg_batch_count", ToString(args.batch_count)}); } } - tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata); + tuner.PrintJSON("clblast_" + settings.kernel_family + "_" + precision_string + ".json", metadata); } |