diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-14 20:29:51 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-14 20:29:51 +0100 |
commit | 11bb30e72bf1f2f36380c0bae8593d2e27ce3bfe (patch) | |
tree | 9cd9c61e6f26fc8c869b58ad72bfea425a4f6b1a | |
parent | 068ff32e9f8094bf848cbc5bd250a8f25776960e (diff) |
Added the possibility to tune batched kernels
-rw-r--r-- | src/tuning/kernels/copy_fast.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/copy_pad.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_fast.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_pad.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xaxpy.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xdot.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm_direct.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xgemv.cpp | 1 | ||||
-rw-r--r-- | src/tuning/kernels/xger.cpp | 1 | ||||
-rw-r--r-- | src/tuning/tuning.hpp | 2 |
11 files changed, 12 insertions, 0 deletions
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp index 7a434513..10ef864b 100644 --- a/src/tuning/kernels/copy_fast.cpp +++ b/src/tuning/kernels/copy_fast.cpp @@ -46,6 +46,7 @@ class TuneCopy { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp index 94d9c303..1feb5683 100644 --- a/src/tuning/kernels/copy_pad.cpp +++ b/src/tuning/kernels/copy_pad.cpp @@ -46,6 +46,7 @@ class TunePad { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp index e16ab235..433f9972 100644 --- a/src/tuning/kernels/transpose_fast.cpp +++ b/src/tuning/kernels/transpose_fast.cpp @@ -46,6 +46,7 @@ class TuneTranspose { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp index c01298bf..d7dc585d 100644 --- a/src/tuning/kernels/transpose_pad.cpp +++ b/src/tuning/kernels/transpose_pad.cpp @@ -46,6 +46,7 @@ class TunePadTranspose { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp index 824ab29e..23132c51 100644 --- a/src/tuning/kernels/xaxpy.cpp +++ b/src/tuning/kernels/xaxpy.cpp @@ -50,6 +50,7 @@ class TuneXaxpy { static size_t DefaultM() { return 1; } // N/A for this kernel static size_t DefaultN() { return 4096*1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp index f871d42a..faf52089 100644 --- a/src/tuning/kernels/xdot.cpp +++ b/src/tuning/kernels/xdot.cpp @@ -46,6 +46,7 @@ class TuneXdot { static size_t DefaultM() { return 1; } // N/A for this kernel static size_t DefaultN() { return 2*1024*1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp index f55eadd8..d34035f4 100644 --- a/src/tuning/kernels/xgemm.cpp +++ b/src/tuning/kernels/xgemm.cpp @@ -51,6 +51,7 @@ class TuneXgemm { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1024; } + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return (V==1) ? 1.0 : 512.0; } // test all or sample randomly static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index ee5bcb7e..5afcdd38 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -51,6 +51,7 @@ class TuneXgemmDirect { static size_t DefaultM() { return 256; } static size_t DefaultN() { return 256; } static size_t DefaultK() { return 256; } + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp index 97a45225..c34e8a1c 100644 --- a/src/tuning/kernels/xgemv.cpp +++ b/src/tuning/kernels/xgemv.cpp @@ -49,6 +49,7 @@ class TuneXgemv { static size_t DefaultM() { return 2048; } static size_t DefaultN() { return 2048; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp index 5057492f..c3fc243b 100644 --- a/src/tuning/kernels/xger.cpp +++ b/src/tuning/kernels/xger.cpp @@ -46,6 +46,7 @@ class TuneXger { static size_t DefaultM() { return 1024; } static size_t DefaultN() { return 1024; } static size_t DefaultK() { return 1; } // N/A for this kernel + static size_t DefaultBatchCount() { return 1; } // N/A for this kernel static double DefaultFraction() { return 1.0; } // N/A for this kernel static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp index 7060fc9f..25504430 100644 --- a/src/tuning/tuning.hpp +++ b/src/tuning/tuning.hpp @@ -47,6 +47,7 @@ void Tuner(int argc, char* argv[]) { if (o == kArgAlpha) { args.alpha = GetArgument(command_line_args, help, kArgAlpha, GetScalar<T>()); } if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<T>()); } if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, C::DefaultFraction()); } + if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, C::DefaultBatchCount()); } } const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, C::DefaultNumRuns()); @@ -158,6 +159,7 @@ void Tuner(int argc, char* argv[]) { if (o == kArgK) { metadata.push_back({"arg_k", std::to_string(args.k)}); } if (o == kArgAlpha) { metadata.push_back({"arg_alpha", ToString(args.alpha)}); } if (o == kArgBeta) { metadata.push_back({"arg_beta", ToString(args.beta)}); } + if (o == kArgBatchCount) { metadata.push_back({"arg_batch_count", ToString(args.batch_count)}); } } tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata); } |