summaryrefslogtreecommitdiff
path: root/src/tuning/kernels/xgemm_direct.cpp
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-10-01 16:58:53 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-10-01 16:58:53 +0200
commita45992010591bfbf46fdc99496e68982cad163b9 (patch)
treebba64ae61b8fddad4a4d4529462ac3dab213ca85 /src/tuning/kernels/xgemm_direct.cpp
parentecc704cc76625fa0601b06ce5246831a14f18c8a (diff)
Added padding to the local memory of the GEMM direct kernel
Diffstat (limited to 'src/tuning/kernels/xgemm_direct.cpp')
-rw-r--r--src/tuning/kernels/xgemm_direct.cpp10
1 files changed, 7 insertions, 3 deletions
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp
index 98714da8..6ab6d1f0 100644
--- a/src/tuning/kernels/xgemm_direct.cpp
+++ b/src/tuning/kernels/xgemm_direct.cpp
@@ -71,6 +71,8 @@ class TuneXgemmDirect {
tuner.AddParameter(id, "KWID", {2});
tuner.AddParameter(id, "VWMD", {1, 2, 4, 8});
tuner.AddParameter(id, "VWND", {1, 2, 4, 8});
+ tuner.AddParameter(id, "PADA", {1});
+ tuner.AddParameter(id, "PADB", {1});
} // a lot more tuning parameters - has to be sampled randomly, too much to test all
else {
tuner.AddParameter(id, "WGD", {8, 16, 32, 64, 128});
@@ -81,6 +83,8 @@ class TuneXgemmDirect {
tuner.AddParameter(id, "KWID", {2, 8, 16});
tuner.AddParameter(id, "VWMD", {1, 2, 4, 8});
tuner.AddParameter(id, "VWND", {1, 2, 4, 8});
+ tuner.AddParameter(id, "PADA", {0, 1});
+ tuner.AddParameter(id, "PADB", {0, 1});
}
}
@@ -112,9 +116,9 @@ class TuneXgemmDirect {
// Sets the local memory size
static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) {
auto LocalMemorySize = [args] (std::vector<size_t> v) {
- return ((v[0]*v[1] + v[2]*v[3])*GetBytes(args.precision));
+ return ((v[0]*(v[0] + v[1]) + v[0]*(v[0] + v[2]))*GetBytes(args.precision));
};
- tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "WGD", "WGD", "WGD"});
+ tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "PADA", "PADB"});
}
// Sets the base thread configuration
@@ -150,7 +154,7 @@ class TuneXgemmDirect {
tuner.AddArgumentScalar(0); // c_offset
tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld
tuner.AddArgumentScalar(1); // a_do_transpose
- tuner.AddArgumentScalar(1); // b_do_transpose
+ tuner.AddArgumentScalar(0); // b_do_transpose
tuner.AddArgumentScalar(1); // c_do_transpose
tuner.AddArgumentScalar(0); // a_conjugate
tuner.AddArgumentScalar(0); // b_conjugate