summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-03-14 20:29:51 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-03-14 20:29:51 +0100
commit11bb30e72bf1f2f36380c0bae8593d2e27ce3bfe (patch)
tree9cd9c61e6f26fc8c869b58ad72bfea425a4f6b1a /src/tuning
parent068ff32e9f8094bf848cbc5bd250a8f25776960e (diff)
Added the possibility to tune batched kernels
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/kernels/copy_fast.cpp1
-rw-r--r--src/tuning/kernels/copy_pad.cpp1
-rw-r--r--src/tuning/kernels/transpose_fast.cpp1
-rw-r--r--src/tuning/kernels/transpose_pad.cpp1
-rw-r--r--src/tuning/kernels/xaxpy.cpp1
-rw-r--r--src/tuning/kernels/xdot.cpp1
-rw-r--r--src/tuning/kernels/xgemm.cpp1
-rw-r--r--src/tuning/kernels/xgemm_direct.cpp1
-rw-r--r--src/tuning/kernels/xgemv.cpp1
-rw-r--r--src/tuning/kernels/xger.cpp1
-rw-r--r--src/tuning/tuning.hpp2
11 files changed, 12 insertions, 0 deletions
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp
index 7a434513..10ef864b 100644
--- a/src/tuning/kernels/copy_fast.cpp
+++ b/src/tuning/kernels/copy_fast.cpp
@@ -46,6 +46,7 @@ class TuneCopy {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp
index 94d9c303..1feb5683 100644
--- a/src/tuning/kernels/copy_pad.cpp
+++ b/src/tuning/kernels/copy_pad.cpp
@@ -46,6 +46,7 @@ class TunePad {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp
index e16ab235..433f9972 100644
--- a/src/tuning/kernels/transpose_fast.cpp
+++ b/src/tuning/kernels/transpose_fast.cpp
@@ -46,6 +46,7 @@ class TuneTranspose {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp
index c01298bf..d7dc585d 100644
--- a/src/tuning/kernels/transpose_pad.cpp
+++ b/src/tuning/kernels/transpose_pad.cpp
@@ -46,6 +46,7 @@ class TunePadTranspose {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp
index 824ab29e..23132c51 100644
--- a/src/tuning/kernels/xaxpy.cpp
+++ b/src/tuning/kernels/xaxpy.cpp
@@ -50,6 +50,7 @@ class TuneXaxpy {
static size_t DefaultM() { return 1; } // N/A for this kernel
static size_t DefaultN() { return 4096*1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp
index f871d42a..faf52089 100644
--- a/src/tuning/kernels/xdot.cpp
+++ b/src/tuning/kernels/xdot.cpp
@@ -46,6 +46,7 @@ class TuneXdot {
static size_t DefaultM() { return 1; } // N/A for this kernel
static size_t DefaultN() { return 2*1024*1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp
index f55eadd8..d34035f4 100644
--- a/src/tuning/kernels/xgemm.cpp
+++ b/src/tuning/kernels/xgemm.cpp
@@ -51,6 +51,7 @@ class TuneXgemm {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1024; }
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 512.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp
index ee5bcb7e..5afcdd38 100644
--- a/src/tuning/kernels/xgemm_direct.cpp
+++ b/src/tuning/kernels/xgemm_direct.cpp
@@ -51,6 +51,7 @@ class TuneXgemmDirect {
static size_t DefaultM() { return 256; }
static size_t DefaultN() { return 256; }
static size_t DefaultK() { return 256; }
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp
index 97a45225..c34e8a1c 100644
--- a/src/tuning/kernels/xgemv.cpp
+++ b/src/tuning/kernels/xgemv.cpp
@@ -49,6 +49,7 @@ class TuneXgemv {
static size_t DefaultM() { return 2048; }
static size_t DefaultN() { return 2048; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp
index 5057492f..c3fc243b 100644
--- a/src/tuning/kernels/xger.cpp
+++ b/src/tuning/kernels/xger.cpp
@@ -46,6 +46,7 @@ class TuneXger {
static size_t DefaultM() { return 1024; }
static size_t DefaultN() { return 1024; }
static size_t DefaultK() { return 1; } // N/A for this kernel
+ static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 7060fc9f..25504430 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -47,6 +47,7 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgAlpha) { args.alpha = GetArgument(command_line_args, help, kArgAlpha, GetScalar<T>()); }
if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<T>()); }
if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, C::DefaultFraction()); }
+ if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, C::DefaultBatchCount()); }
}
const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, C::DefaultNumRuns());
@@ -158,6 +159,7 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgK) { metadata.push_back({"arg_k", std::to_string(args.k)}); }
if (o == kArgAlpha) { metadata.push_back({"arg_alpha", ToString(args.alpha)}); }
if (o == kArgBeta) { metadata.push_back({"arg_beta", ToString(args.beta)}); }
+ if (o == kArgBatchCount) { metadata.push_back({"arg_batch_count", ToString(args.batch_count)}); }
}
tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata);
}