diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-01 16:58:53 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-10-01 16:58:53 +0200 |
commit | a45992010591bfbf46fdc99496e68982cad163b9 (patch) | |
tree | bba64ae61b8fddad4a4d4529462ac3dab213ca85 /src/tuning/kernels/xgemm_direct.cpp | |
parent | ecc704cc76625fa0601b06ce5246831a14f18c8a (diff) |
Added padding to the local memory of the GEMM direct kernel
Diffstat (limited to 'src/tuning/kernels/xgemm_direct.cpp')
-rw-r--r-- | src/tuning/kernels/xgemm_direct.cpp | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index 98714da8..6ab6d1f0 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -71,6 +71,8 @@ class TuneXgemmDirect { tuner.AddParameter(id, "KWID", {2}); tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + tuner.AddParameter(id, "PADA", {1}); + tuner.AddParameter(id, "PADB", {1}); } // a lot more tuning parameters - has to be sampled randomly, too much to test all else { tuner.AddParameter(id, "WGD", {8, 16, 32, 64, 128}); @@ -81,6 +83,8 @@ class TuneXgemmDirect { tuner.AddParameter(id, "KWID", {2, 8, 16}); tuner.AddParameter(id, "VWMD", {1, 2, 4, 8}); tuner.AddParameter(id, "VWND", {1, 2, 4, 8}); + tuner.AddParameter(id, "PADA", {0, 1}); + tuner.AddParameter(id, "PADB", {0, 1}); } } @@ -112,9 +116,9 @@ class TuneXgemmDirect { // Sets the local memory size static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) { auto LocalMemorySize = [args] (std::vector<size_t> v) { - return ((v[0]*v[1] + v[2]*v[3])*GetBytes(args.precision)); + return ((v[0]*(v[0] + v[1]) + v[0]*(v[0] + v[2]))*GetBytes(args.precision)); }; - tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "WGD", "WGD", "WGD"}); + tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"WGD", "PADA", "PADB"}); } // Sets the base thread configuration @@ -150,7 +154,7 @@ class TuneXgemmDirect { tuner.AddArgumentScalar(0); // c_offset tuner.AddArgumentScalar(static_cast<int>(args.n)); // c_ld tuner.AddArgumentScalar(1); // a_do_transpose - tuner.AddArgumentScalar(1); // b_do_transpose + tuner.AddArgumentScalar(0); // b_do_transpose tuner.AddArgumentScalar(1); // c_do_transpose tuner.AddArgumentScalar(0); // a_conjugate tuner.AddArgumentScalar(0); // b_conjugate |