summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-23 10:20:11 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-23 10:20:11 +0200
commit75fe8235f78520fbbfff7c9c035ecd9f1aa3e6f6 (patch)
treed49e584504b8192f250d96227aa3be01fbac2c92 /src/tuning
parentb33bec4a59d9d4d0b2e6a3d7e5f1d6e23d4279cb (diff)
Improved the XgemvFastRot kernel by tiled loading of the input matrix A, enabling better memory performance
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/kernels/xgemv.cpp20
1 files changed, 15 insertions, 5 deletions
diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp
index 5c187d33..b69e4352 100644
--- a/src/tuning/kernels/xgemv.cpp
+++ b/src/tuning/kernels/xgemv.cpp
@@ -61,8 +61,9 @@ class TuneXgemv {
// Sets the tuning parameters and their possible values
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
- tuner.AddParameter(id, "WGS"+std::to_string(V), {64, 128, 256});
- tuner.AddParameter(id, "WPT"+std::to_string(V), {1, 2, 4});
+ tuner.AddParameter(id, "WGS"+std::to_string(V), {32, 64, 128, 256});
+ if (V==1 || V==2) { tuner.AddParameter(id, "WPT"+std::to_string(V), {1, 2, 4}); }
+ else { tuner.AddParameter(id, "WPT"+std::to_string(V), {1, 2, 4, 8, 16, 32}); }
if (V==2 || V==3) { tuner.AddParameter(id, "VW"+std::to_string(V), {1, 2, 4, 8}); }
}
@@ -74,8 +75,14 @@ class TuneXgemv {
}
}
static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) {
- auto LocalMemorySize = [args] (std::vector<size_t> v) { return v[0]*GetBytes(args.precision); };
- tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V)});
+ if (V==1 || V==2) {
+ auto LocalMemorySize = [args] (std::vector<size_t> v) { return v[0]*GetBytes(args.precision); };
+ tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V)});
+ }
+ else {
+ auto LocalMemorySize = [args] (std::vector<size_t> v) { return (v[0]*v[1] + v[1])*GetBytes(args.precision); };
+ tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGS"+std::to_string(V), "WPT"+std::to_string(V)});
+ }
}
// Sets the base thread configuration
@@ -89,7 +96,10 @@ class TuneXgemv {
static TransformVector MulLocal() { return {{"WGS"+std::to_string(V)}}; }
static TransformVector DivLocal() { return {}; }
static TransformVector MulGlobal() { return {}; }
- static TransformVector DivGlobal() { return {{"WPT"+std::to_string(V)}}; }
+ static TransformVector DivGlobal() {
+ if (V==1 || V==2) return {{"WPT"+std::to_string(V)}};
+ return {};
+ }
// Sets the kernel's arguments
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,