summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-10-01 13:45:08 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-10-01 13:45:08 +0200
commita9d35cf04ceb2ba2185c7520dbff79580abbd785 (patch)
treed12dabf8c65ee699a78cee8313ccb19377ab2832 /src/tuning
parent73d135c2cef9763b47d410b125eb8bb89ece8432 (diff)
parentd59e5c570b0bbdb8348d2f9ee6fc5850e606db27 (diff)
Merge branch 'development' into gemm_direct
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/kernels/xgemm.cpp6
-rw-r--r--src/tuning/tuning.hpp18
2 files changed, 14 insertions, 10 deletions
diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp
index 4cb7fd00..1abc5e8a 100644
--- a/src/tuning/kernels/xgemm.cpp
+++ b/src/tuning/kernels/xgemm.cpp
@@ -126,10 +126,10 @@ class TuneXgemm {
// Sets the local memory size
static void SetLocalMemorySize(cltune::Tuner &tuner, const size_t id, const Arguments<T> &args) {
auto LocalMemorySize = [args] (std::vector<size_t> v) {
- return (((v[0]*v[1]*v[2]/v[3]) + (v[4]*v[5]*v[6]/v[7]))*GetBytes(args.precision));
+ return (((v[0]*v[1]*v[2]) + (v[3]*v[4]*v[5]))*GetBytes(args.precision));
};
- tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"SA", "KWG", "MWG", "VWM",
- "SB", "KWG", "NWG", "VWN"});
+ tuner.SetLocalMemoryUsage(id, LocalMemorySize, {"SA", "KWG", "MWG",
+ "SB", "KWG", "NWG"});
}
// Sets the base thread configuration
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 19df5f9a..8fa93efc 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -30,6 +30,7 @@ namespace clblast {
// that it is automatically compiled for the various kernels (given as the 'C' template argument).
template <typename C, typename T>
void Tuner(int argc, char* argv[]) {
+ constexpr auto kSeed = 42; // fixed seed for reproducibility
// Sets the parameters and platform/device for which to tune (command-line options)
auto help = std::string{"* Options given/available:\n"};
@@ -45,6 +46,8 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgBeta) { args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar<T>()); }
if (o == kArgFraction) { args.fraction = GetArgument(argc, argv, help, kArgFraction, C::DefaultFraction()); }
}
+ const auto num_runs = GetArgument(argc, argv, help, kArgNumRuns, size_t{1});
+
fprintf(stdout, "%s\n", help.c_str());
// Tests validity of the given arguments
@@ -73,12 +76,12 @@ void Tuner(int argc, char* argv[]) {
auto b_mat = std::vector<T>(C::GetSizeB(args));
auto c_mat = std::vector<T>(C::GetSizeC(args));
auto temp = std::vector<T>(C::GetSizeTemp(args));
- PopulateVector(x_vec);
- PopulateVector(y_vec);
- PopulateVector(a_mat);
- PopulateVector(b_mat);
- PopulateVector(c_mat);
- PopulateVector(temp);
+ PopulateVector(x_vec, kSeed);
+ PopulateVector(y_vec, kSeed);
+ PopulateVector(a_mat, kSeed);
+ PopulateVector(b_mat, kSeed);
+ PopulateVector(c_mat, kSeed);
+ PopulateVector(temp, kSeed);
// Initializes the tuner for the chosen device
cltune::Tuner tuner(args.platform_id, args.device_id);
@@ -126,6 +129,7 @@ void Tuner(int argc, char* argv[]) {
C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat, temp);
// Starts the tuning process
+ tuner.SetNumRuns(num_runs);
tuner.Tune();
// Prints the results to screen
@@ -134,7 +138,7 @@ void Tuner(int argc, char* argv[]) {
// Also prints the performance of the best-case in terms of GB/s or GFLOPS
if (time_ms != 0.0) {
- printf("[ -------> ] %.1lf ms", time_ms);
+ printf("[ -------> ] %.2lf ms", time_ms);
printf(" or %.1lf %s\n", C::GetMetric(args)/(time_ms*1.0e6), C::PerformanceUnit().c_str());
}