summaryrefslogtreecommitdiff
path: root/src/tuning/xgemv.cc
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-06-10 08:44:30 +0200
committerCNugteren <web@cedricnugteren.nl>2015-06-10 08:44:30 +0200
commit85c1db93221bf9d71083c6725a33ccbcd1b61de4 (patch)
treee53e4d62d53cc85c4383bccea12904f27c4ac7bc /src/tuning/xgemv.cc
parent3c17c1c13313022879c8caf289d0f47ea5d7d22d (diff)
Added initial naive version of Xgemv kernel
Diffstat (limited to 'src/tuning/xgemv.cc')
-rw-r--r--src/tuning/xgemv.cc89
1 files changed, 89 insertions, 0 deletions
diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc
new file mode 100644
index 00000000..1ee7c7bf
--- /dev/null
+++ b/src/tuning/xgemv.cc
@@ -0,0 +1,89 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements an auto-tuner to tune the Xgemv OpenCL kernel. It uses the CLTune library.
+//
+// =================================================================================================
+
+#include <string>
+#include <vector>
+#include <stdexcept>
+
+#include "internal/utilities.h"
+#include "internal/tuning.h"
+
+namespace clblast {
+// =================================================================================================
+
+// The Xgemv auto-tuner
+template <typename T>
+void XgemvTune(const Arguments<T> &args,
+ const std::vector<T> &a_mat, const std::vector<T> &x_vec, std::vector<T> &y_vec,
+ cltune::Tuner &tuner) {
+
+ // This points to the Xgemv kernel as found in the CLBlast library
+ std::string common_source =
+ #include "../src/kernels/common.opencl"
+ std::string kernel_source =
+ #include "../src/kernels/xgemv.opencl"
+ auto sources = common_source + kernel_source;
+ auto id = tuner.AddKernelFromString(sources, "Xgemv", {args.m}, {1});
+ tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64});
+
+ // Sets the tunable parameters and their possible values
+ tuner.AddParameter(id, "WGS", {64, 128});
+ tuner.AddParameter(id, "WPT", {1});
+ tuner.AddParameter(id, "VW", {1});
+
+ // Tests for a specific precision
+ tuner.AddParameter(id, "PRECISION", {static_cast<size_t>(args.precision)});
+ tuner.AddParameterReference("PRECISION", static_cast<size_t>(args.precision));
+
+ // Modifies the thread-sizes (local) based on the parameters
+ tuner.MulLocalSize(id, {"WGS"});
+ tuner.DivGlobalSize(id, {"WPT"});
+ tuner.DivGlobalSize(id, {"VW"});
+
+ // Sets the function's arguments
+ tuner.AddArgumentScalar(static_cast<int>(args.m));
+ tuner.AddArgumentScalar(static_cast<int>(args.n));
+ tuner.AddArgumentScalar(args.alpha);
+ tuner.AddArgumentScalar(args.beta);
+ tuner.AddArgumentInput(a_mat);
+ tuner.AddArgumentInput(x_vec);
+ tuner.AddArgumentScalar(0);
+ tuner.AddArgumentScalar(1);
+ tuner.AddArgumentOutput(y_vec);
+ tuner.AddArgumentScalar(0);
+ tuner.AddArgumentScalar(1);
+}
+
+// =================================================================================================
+
+// Main function which calls the common client code with the routine-specific function as argument.
+void TunerXgemv(int argc, char *argv[]) {
+ switch(GetPrecision(argc, argv)) {
+ case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case Precision::kSingle: TunerAXY<float>(argc, argv, XgemvTune<float>); break;
+ case Precision::kDouble: TunerAXY<double>(argc, argv, XgemvTune<double>); break;
+ case Precision::kComplexSingle: TunerAXY<float2>(argc, argv, XgemvTune<float2>); break;
+ case Precision::kComplexDouble: TunerAXY<double2>(argc, argv, XgemvTune<double2>); break;
+ }
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ clblast::TunerXgemv(argc, argv);
+ return 0;
+}
+
+// =================================================================================================