From aa7db4f987360fe1956add9391c6e81aa61b75f3 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 23 Dec 2017 13:34:57 +0100 Subject: Added TRSV block-size tuner --- src/tuning/routines/xgemm.cpp | 2 +- src/tuning/routines/xtrsv.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 src/tuning/routines/xtrsv.cpp (limited to 'src/tuning') diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp index cf750519..83db6104 100644 --- a/src/tuning/routines/xgemm.cpp +++ b/src/tuning/routines/xgemm.cpp @@ -72,7 +72,7 @@ void TuneXgemm(int argc, char* argv[]) { return; } const auto context = Context(device); - const auto queue = Queue(context, device); + auto queue = Queue(context, device); // Buffers auto buffers = std::vector>{ diff --git a/src/tuning/routines/xtrsv.cpp b/src/tuning/routines/xtrsv.cpp new file mode 100644 index 00000000..9e8f26fa --- /dev/null +++ b/src/tuning/routines/xtrsv.cpp @@ -0,0 +1,142 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file tunes the Xtrsv routine at a high-level: choosing an appropriate block size +// +// ================================================================================================= + +#include +#include +#include +#include + +#include "utilities/utilities.hpp" +#include "tuning/tuning.hpp" +#include "routines/routines.hpp" + +namespace clblast { +// ================================================================================================= + +constexpr auto size = size_t{1024}; // 'n' argument + +template +void SetBlockSize(const size_t value, const Device &device) { + const auto override_status = OverrideParameters(device(), "TrsvRoutine", PrecisionValue(), + {{"TRSV_BLOCK_SIZE", value}}); + if (override_status != StatusCode::kSuccess) { + throw RuntimeError("OverrideParameters failed with status " + ToString(override_status)); + } +} + +template +void RunTrsvRoutine(const size_t block_size, Queue& queue, const std::vector>& buffers) { + SetBlockSize(block_size, queue.GetDevice()); + auto event = cl_event{}; + auto routine = Xtrsv(queue, nullptr); + routine.DoTrsv(Layout::kRowMajor, Triangle::kLower, Transpose::kNo, Diagonal::kNonUnit, + size, + buffers[0], 0, size, // A matrix + buffers[1], 0, 1); // X vector + clWaitForEvents(1, &event); + clReleaseEvent(event); +} + +template +void TuneXtrsv(int argc, char* argv[]) { + auto command_line_args = RetrieveCommandLineArguments(argc, argv); + auto help = std::string{"* Options given/available:\n"}; + const auto platform_id = GetArgument(command_line_args, help, kArgPlatform, ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0})); + const auto device_id = GetArgument(command_line_args, help, kArgDevice, ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0})); + const auto precision = GetArgument(command_line_args, help, kArgPrecision, Precision::kSingle); + const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, size_t{10}); + fprintf(stdout, "%s\n", help.c_str()); + + // Values for the block size + const auto from = size_t{8}; + const auto to = size_t{64 + 1}; + const auto step = size_t{8}; + + // OpenCL initialisation + const auto platform = Platform(platform_id); + const auto device = Device(platform, device_id); + if (!PrecisionSupported(device)) { + printf("* Unsupported precision, skipping this tuning run\n"); + return; + } + const auto context = Context(device); + auto queue = Queue(context, device); + + // Buffers + auto buffers = std::vector>{ + Buffer(context, size * size), + Buffer(context, size) + }; + + // Performance testing + const auto results = TimeRoutine(from, to, step, num_runs, queue, buffers, RunTrsvRoutine); + + // Stores the results in the expected format + auto scores = std::vector(); + for (const auto &result : results) { + if (result.second != -1) { + auto tuning_results = Configuration(); + tuning_results["TRSV_BLOCK_SIZE"] = result.first; + tuning_results["PRECISION"] = static_cast(precision); + scores.emplace_back(TuningResult{"trsv_routine", result.second, tuning_results}); + } + } + + // Computes the best result + auto best_time = std::numeric_limits::max(); + auto best_value = size_t{0}; + for (const auto &result : results) { + if (result.second != -1 && result.second < best_time) { + best_time = result.second; + best_value = result.first; + } + } + const auto best_string = "TRSV_BLOCK_SIZE=" + ToString(best_value); + + // Outputs the results as JSON to disk, including some meta-data + const auto precision_string = std::to_string(static_cast(precision)); + auto metadata = std::vector>{ + {"kernel_family", "trsv_routine"}, + {"precision", precision_string}, + {"arg_n", ToString(size)}, + {"best_kernel", "trsv_routine"}, + {"best_time", ToString(best_time)}, + {"best_parameters", best_string} + }; + PrintTimingsToFileAsJSON("clblast_routine_xtrsv_" + precision_string + ".json", + device, platform, metadata, scores); + + printf("* Completed tuning process\n"); + printf("\n"); +} + +// ================================================================================================= +} // namespace clblast + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); + switch(clblast::GetPrecision(command_line_args)) { + case clblast::Precision::kSingle: clblast::TuneXtrsv(argc, argv); break; + case clblast::Precision::kDouble: clblast::TuneXtrsv(argc, argv); break; + case clblast::Precision::kComplexSingle: clblast::TuneXtrsv(argc, argv); break; + case clblast::Precision::kComplexDouble: clblast::TuneXtrsv(argc, argv); break; + } + return 0; +} + +// ================================================================================================= -- cgit v1.2.3