diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-10 08:44:30 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-10 08:44:30 +0200 |
commit | 85c1db93221bf9d71083c6725a33ccbcd1b61de4 (patch) | |
tree | e53e4d62d53cc85c4383bccea12904f27c4ac7bc /src/tuning/xgemv.cc | |
parent | 3c17c1c13313022879c8caf289d0f47ea5d7d22d (diff) |
Added initial naive version of Xgemv kernel
Diffstat (limited to 'src/tuning/xgemv.cc')
-rw-r--r-- | src/tuning/xgemv.cc | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc new file mode 100644 index 00000000..1ee7c7bf --- /dev/null +++ b/src/tuning/xgemv.cc @@ -0,0 +1,89 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements an auto-tuner to tune the Xgemv OpenCL kernel. It uses the CLTune library. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <stdexcept> + +#include "internal/utilities.h" +#include "internal/tuning.h" + +namespace clblast { +// ================================================================================================= + +// The Xgemv auto-tuner +template <typename T> +void XgemvTune(const Arguments<T> &args, + const std::vector<T> &a_mat, const std::vector<T> &x_vec, std::vector<T> &y_vec, + cltune::Tuner &tuner) { + + // This points to the Xgemv kernel as found in the CLBlast library + std::string common_source = + #include "../src/kernels/common.opencl" + std::string kernel_source = + #include "../src/kernels/xgemv.opencl" + auto sources = common_source + kernel_source; + auto id = tuner.AddKernelFromString(sources, "Xgemv", {args.m}, {1}); + tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64}); + + // Sets the tunable parameters and their possible values + tuner.AddParameter(id, "WGS", {64, 128}); + tuner.AddParameter(id, "WPT", {1}); + tuner.AddParameter(id, "VW", {1}); + + // Tests for a specific precision + tuner.AddParameter(id, "PRECISION", {static_cast<size_t>(args.precision)}); + tuner.AddParameterReference("PRECISION", static_cast<size_t>(args.precision)); + + // Modifies the thread-sizes (local) based on the parameters + tuner.MulLocalSize(id, {"WGS"}); + tuner.DivGlobalSize(id, {"WPT"}); + tuner.DivGlobalSize(id, {"VW"}); + + // Sets the function's arguments + tuner.AddArgumentScalar(static_cast<int>(args.m)); + tuner.AddArgumentScalar(static_cast<int>(args.n)); + tuner.AddArgumentScalar(args.alpha); + tuner.AddArgumentScalar(args.beta); + tuner.AddArgumentInput(a_mat); + tuner.AddArgumentInput(x_vec); + tuner.AddArgumentScalar(0); + tuner.AddArgumentScalar(1); + tuner.AddArgumentOutput(y_vec); + tuner.AddArgumentScalar(0); + tuner.AddArgumentScalar(1); +} + +// ================================================================================================= + +// Main function which calls the common client code with the routine-specific function as argument. +void TunerXgemv(int argc, char *argv[]) { + switch(GetPrecision(argc, argv)) { + case Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case Precision::kSingle: TunerAXY<float>(argc, argv, XgemvTune<float>); break; + case Precision::kDouble: TunerAXY<double>(argc, argv, XgemvTune<double>); break; + case Precision::kComplexSingle: TunerAXY<float2>(argc, argv, XgemvTune<float2>); break; + case Precision::kComplexDouble: TunerAXY<double2>(argc, argv, XgemvTune<double2>); break; + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::TunerXgemv(argc, argv); + return 0; +} + +// ================================================================================================= |