summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-08-21 20:06:29 +0200
committerGitHub <noreply@github.com>2017-08-21 20:06:29 +0200
commite5eb6b1d3a66358093cb40f0fad51ecdc4654771 (patch)
treee5eb03736240ec07534319fdd15661e1093f04ac /src
parentd67fd6604b4a6584c4f9e856057fcc8076ce377d (diff)
parentdfd332524ab0e66a04d803bb075787e35cd2db1a (diff)
Merge pull request #173 from mcian/PSO_params
Add PSO parameters support and search strategy selection from command…
Diffstat (limited to 'src')
-rw-r--r--src/routines/level3/xgemm.cpp6
-rw-r--r--src/tuning/kernels/copy_fast.cpp13
-rw-r--r--src/tuning/kernels/copy_pad.cpp13
-rw-r--r--src/tuning/kernels/transpose_fast.cpp13
-rw-r--r--src/tuning/kernels/transpose_pad.cpp13
-rw-r--r--src/tuning/kernels/xaxpy.cpp13
-rw-r--r--src/tuning/kernels/xdot.cpp13
-rw-r--r--src/tuning/kernels/xgemm.cpp30
-rw-r--r--src/tuning/kernels/xgemm_direct.cpp24
-rw-r--r--src/tuning/kernels/xgemv.cpp13
-rw-r--r--src/tuning/kernels/xger.cpp13
-rw-r--r--src/tuning/tuning.hpp23
-rw-r--r--src/utilities/utilities.hpp14
13 files changed, 178 insertions, 23 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 136eec43..3909c308 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -102,9 +102,9 @@ void Xgemm<T>::DoGemm(const Layout layout,
TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld);
// Selects which version of GEMM to run
- const auto m_n_k = static_cast<unsigned long>(m) * static_cast<unsigned long>(n) *
- static_cast<unsigned long>(k);
- const auto do_gemm_direct = (m_n_k < static_cast<unsigned long>(db_["XGEMM_MIN_INDIRECT_SIZE"]));
+ const auto m_n_k = static_cast<unsigned long long>(m) * static_cast<unsigned long long>(n) *
+ static_cast<unsigned long long>(k);
+ const auto do_gemm_direct = (m_n_k < static_cast<unsigned long long>(db_["XGEMM_MIN_INDIRECT_SIZE"]));
if (do_gemm_direct) { // for small sizes (single kernel)
GemmDirect(m, n, k, alpha,
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp
index d3d12bed..c9bf478c 100644
--- a/src/tuning/kernels/copy_fast.cpp
+++ b/src/tuning/kernels/copy_fast.cpp
@@ -49,7 +49,13 @@ class TuneCopy {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -99,6 +105,11 @@ class TuneCopy {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp
index 0e157364..23f52d75 100644
--- a/src/tuning/kernels/copy_pad.cpp
+++ b/src/tuning/kernels/copy_pad.cpp
@@ -49,7 +49,13 @@ class TunePad {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -107,6 +113,11 @@ class TunePad {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp
index a1ce4220..308663d8 100644
--- a/src/tuning/kernels/transpose_fast.cpp
+++ b/src/tuning/kernels/transpose_fast.cpp
@@ -49,7 +49,13 @@ class TuneTranspose {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -104,6 +110,11 @@ class TuneTranspose {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp
index 490580b5..304702de 100644
--- a/src/tuning/kernels/transpose_pad.cpp
+++ b/src/tuning/kernels/transpose_pad.cpp
@@ -49,7 +49,13 @@ class TunePadTranspose {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -111,6 +117,11 @@ class TunePadTranspose {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp
index a13e54f2..f8e1d93e 100644
--- a/src/tuning/kernels/xaxpy.cpp
+++ b/src/tuning/kernels/xaxpy.cpp
@@ -53,7 +53,13 @@ class TuneXaxpy {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;} // N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -102,6 +108,11 @@ class TuneXaxpy {
return 3 * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp
index b85c8521..c3b5361e 100644
--- a/src/tuning/kernels/xdot.cpp
+++ b/src/tuning/kernels/xdot.cpp
@@ -49,7 +49,13 @@ class TuneXdot {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -108,6 +114,11 @@ class TuneXdot {
return (V==1) ? (2*args.n + 1) * GetBytes(args.precision) : 1 * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return (V==1) ? "GB/s" : "N/A"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp
index 9b4cea8b..063a3bae 100644
--- a/src/tuning/kernels/xgemm.cpp
+++ b/src/tuning/kernels/xgemm.cpp
@@ -19,6 +19,7 @@
#include "utilities/utilities.hpp"
#include "tuning/tuning.hpp"
+
namespace clblast {
// =================================================================================================
@@ -28,7 +29,8 @@ class TuneXgemm {
public:
// The representative kernel and the source code
- static std::string KernelFamily() { return (V==1) ? "xgemm_1" : "xgemm_2"; }
+ // static std::string KernelFamily() { return (V==1) ? "xgemm_1" : "xgemm_2"; }
+ static std::string KernelFamily() { switch(V){ case 1: return "xgemm_1"; case 2: return "xgemm_2"; }}
static std::string KernelName() { return "Xgemm"; }
static std::string GetSources() {
return
@@ -41,7 +43,9 @@ class TuneXgemm {
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction};
+ return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
+ kArgHeuristicSelection, kArgPsoSwarmSize,
+ kArgPsoInfGlobal, kArgPsoInfLocal, kArgPsoInfRandom};
}
// Tests for valid arguments
@@ -54,7 +58,13 @@ class TuneXgemm {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 512.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; }
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::PSO);}
+ static double DefaultMaxTempAnn(){ return 1.0;}
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -82,6 +92,7 @@ class TuneXgemm {
tuner.AddParameter(id, "SB", {0, 1});
} // a lot more tuning parameters - has to be sampled randomly, too much to test all
else {
+ //RANDOM_SEARCH & PSO
tuner.AddParameter(id, "MWG", {16, 32, 64, 128});
tuner.AddParameter(id, "NWG", {16, 32, 64, 128});
tuner.AddParameter(id, "KWG", {16, 32});
@@ -97,6 +108,7 @@ class TuneXgemm {
tuner.AddParameter(id, "SA", {0, 1});
tuner.AddParameter(id, "SB", {0, 1});
}
+
}
// Sets the constraints
@@ -167,6 +179,18 @@ class TuneXgemm {
return 2 * args.m * args.n * args.k;
}
static std::string PerformanceUnit() { return "GFLOPS"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ // Use full-search to explore all parameter combinations or random-search to search only a part of
+ // the parameter values. The fraction is set as a command-line argument.
+ if (args.fraction == 1.0 || args.fraction == 0.0) {
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
+ else {
+ return args.heuristic_selection;
+ }
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp
index e7a35b93..3de19496 100644
--- a/src/tuning/kernels/xgemm_direct.cpp
+++ b/src/tuning/kernels/xgemm_direct.cpp
@@ -41,7 +41,9 @@ class TuneXgemmDirect {
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction};
+ return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
+ kArgHeuristicSelection, kArgPsoSwarmSize,
+ kArgPsoInfGlobal, kArgPsoInfLocal, kArgPsoInfRandom};
}
// Tests for valid arguments
@@ -54,7 +56,13 @@ class TuneXgemmDirect {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; }
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }
+ static size_t DefaultHeuristic(){ return static_cast<size_t>(cltune::SearchMethod::PSO);}
+ static double DefaultMaxTempAnn(){ return 1.0;}
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -166,6 +174,18 @@ class TuneXgemmDirect {
return 2 * args.m * args.n * args.k;
}
static std::string PerformanceUnit() { return "GFLOPS"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ // Use full-search to explore all parameter combinations or random-search to search only a part of
+ // the parameter values. The fraction is set as a command-line argument.
+ if (args.fraction == 1.0 || args.fraction == 0.0) {
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
+ else {
+ return args.heuristic_selection;
+ }
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp
index 9e9a6fe1..00115b6c 100644
--- a/src/tuning/kernels/xgemv.cpp
+++ b/src/tuning/kernels/xgemv.cpp
@@ -52,7 +52,13 @@ class TuneXgemv {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.m; }
@@ -148,6 +154,11 @@ class TuneXgemv {
return (args.m*args.n + 2*args.m + args.n) * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp
index c82a29b6..14a98761 100644
--- a/src/tuning/kernels/xger.cpp
+++ b/src/tuning/kernels/xger.cpp
@@ -49,7 +49,13 @@ class TuneXger {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; } // N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return static_cast<size_t> (cltune::SearchMethod::FullSearch);}
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.m; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -107,6 +113,11 @@ class TuneXger {
return (2*args.m*args.n + args.m + args.n) * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetHeuristic(const Arguments<T> &args){
+ return static_cast<size_t> (cltune::SearchMethod::FullSearch);
+ }
};
// =================================================================================================
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 25504430..6a8039d2 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -48,6 +48,12 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<T>()); }
if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, C::DefaultFraction()); }
if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, C::DefaultBatchCount()); }
+ if (o == kArgHeuristicSelection) {args.heuristic_selection = GetArgument(command_line_args, help, kArgHeuristicSelection, C::DefaultHeuristic()); }
+ if (o == kArgPsoSwarmSize) {args.pso_swarm_size = GetArgument(command_line_args, help, kArgPsoSwarmSize , C::DefaultSwarmSizePSO()); }
+ if (o == kArgPsoInfGlobal) {args.pso_inf_global = GetArgument(command_line_args, help, kArgPsoInfGlobal, C::DefaultInfluenceGlobalPSO()); }
+ if (o == kArgPsoInfLocal) {args.pso_inf_local = GetArgument(command_line_args, help, kArgPsoInfLocal, C::DefaultInfluenceLocalPSO()); }
+ if (o == kArgPsoInfRandom) {args.pso_inf_random = GetArgument(command_line_args, help, kArgPsoInfRandom, C::DefaultInfluenceRandomPSO()); }
+ if (o == kArgAnnMaxTemp) {args.ann_max_temperature = GetArgument(command_line_args, help, kArgAnnMaxTemp, C::DefaultMaxTempAnn());}
}
const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, C::DefaultNumRuns());
@@ -91,14 +97,16 @@ void Tuner(int argc, char* argv[]) {
// Initializes the tuner for the chosen device
cltune::Tuner tuner(args.platform_id, args.device_id);
- // Use full-search to explore all parameter combinations or random-search to search only a part of
- // the parameter values. The fraction is set as a command-line argument.
- if (args.fraction == 1.0 || args.fraction == 0.0) {
- tuner.UseFullSearch();
- }
- else {
- tuner.UseRandomSearch(1.0/args.fraction);
+ // Select the search method based on the cmd_line arguments
+ // If the tuner does not support the selected choice, Full Search will be returned.
+ auto method = C::GetHeuristic(args);
+
+ if (method == 1) { tuner.UseRandomSearch(1.0/args.fraction); }
+ else if (method == 2) { tuner.UseAnnealing(args.fraction, args.ann_max_temperature); }
+ else if (method == 3) {
+ tuner.UsePSO(args.fraction, args.pso_swarm_size, args.pso_inf_global, args.pso_inf_local, args.pso_inf_random);
}
+ else { tuner.UseFullSearch(); }
// Set extra settings for specific defines. This mimics src/routine.cc.
auto defines = std::string{""};
@@ -162,6 +170,7 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgBatchCount) { metadata.push_back({"arg_batch_count", ToString(args.batch_count)}); }
}
tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata);
+
}
// =================================================================================================
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 72997d7f..a9c492f3 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -79,6 +79,14 @@ constexpr auto kArgBatchCount = "batch_num";
// The tuner-specific arguments in string form
constexpr auto kArgFraction = "fraction";
+constexpr auto kArgHeuristicSelection = "heuristic";
+// PSO tuner-specific arguments in string form
+constexpr auto kArgPsoSwarmSize = "pso_swarm_size";
+constexpr auto kArgPsoInfGlobal = "pso_inf_global";
+constexpr auto kArgPsoInfLocal = "pso_inf_local";
+constexpr auto kArgPsoInfRandom = "pso_inf_random";
+// Annealing tuner-specific arguments in string form
+constexpr auto kArgAnnMaxTemp = "ann_max_temperature";
// The common arguments in string form
constexpr auto kArgPlatform = "platform";
@@ -172,7 +180,13 @@ struct Arguments {
size_t ap_size = 1;
size_t scalar_size = 1;
// Tuner-specific arguments
+ size_t heuristic_selection = 0;
double fraction = 1.0;
+ size_t pso_swarm_size = 8;
+ double pso_inf_global = 0.3;
+ double pso_inf_local = 0.6;
+ double pso_inf_random = 0.1;
+ double ann_max_temperature = 1.0; // Is it a valid default value?
// Client-specific arguments
int compare_clblas = 1;
int compare_cblas = 1;