summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormcian <mcian86@gmail.com>2017-07-23 14:48:13 +0200
committermcian <mcian86@gmail.com>2017-07-23 14:48:13 +0200
commit473e81471895b35dcec5cb82e6beba134c544006 (patch)
treee4ff6df062b45644bc0ca8c0fb7640864128ad2f
parent8131e68664e02c8a1bc5a0f5598294fd3bc5b974 (diff)
Code refactoring
-rw-r--r--src/tuning/kernels/copy_fast.cpp14
-rw-r--r--src/tuning/kernels/copy_pad.cpp14
-rw-r--r--src/tuning/kernels/transpose_fast.cpp14
-rw-r--r--src/tuning/kernels/transpose_pad.cpp14
-rw-r--r--src/tuning/kernels/xaxpy.cpp14
-rw-r--r--src/tuning/kernels/xdot.cpp14
-rw-r--r--src/tuning/kernels/xgemm.cpp106
-rw-r--r--src/tuning/kernels/xgemm_direct.cpp64
-rw-r--r--src/tuning/kernels/xgemv.cpp14
-rw-r--r--src/tuning/kernels/xger.cpp14
-rw-r--r--src/tuning/tuning.hpp60
-rw-r--r--src/utilities/utilities.hpp59
12 files changed, 267 insertions, 134 deletions
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp
index d3d12bed..5d70c219 100644
--- a/src/tuning/kernels/copy_fast.cpp
+++ b/src/tuning/kernels/copy_fast.cpp
@@ -49,7 +49,14 @@ class TuneCopy {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full Search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -99,6 +106,11 @@ class TuneCopy {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp
index 0e157364..c487aaf5 100644
--- a/src/tuning/kernels/copy_pad.cpp
+++ b/src/tuning/kernels/copy_pad.cpp
@@ -49,7 +49,14 @@ class TunePad {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full Search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -107,6 +114,11 @@ class TunePad {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full Search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp
index a1ce4220..e169c831 100644
--- a/src/tuning/kernels/transpose_fast.cpp
+++ b/src/tuning/kernels/transpose_fast.cpp
@@ -49,7 +49,14 @@ class TuneTranspose {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -104,6 +111,11 @@ class TuneTranspose {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp
index 490580b5..a1695c9f 100644
--- a/src/tuning/kernels/transpose_pad.cpp
+++ b/src/tuning/kernels/transpose_pad.cpp
@@ -49,7 +49,14 @@ class TunePadTranspose {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; }// N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // N/A for this kernel
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -111,6 +118,11 @@ class TunePadTranspose {
return 2 * args.m * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp
index a13e54f2..a8a9457d 100644
--- a/src/tuning/kernels/xaxpy.cpp
+++ b/src/tuning/kernels/xaxpy.cpp
@@ -53,7 +53,14 @@ class TuneXaxpy {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full search
+ static double DefaultMaxTempAnn(){ return 1.0;} // N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -102,6 +109,11 @@ class TuneXaxpy {
return 3 * args.n * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp
index b85c8521..f70fafb3 100644
--- a/src/tuning/kernels/xdot.cpp
+++ b/src/tuning/kernels/xdot.cpp
@@ -49,7 +49,14 @@ class TuneXdot {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -108,6 +115,11 @@ class TuneXdot {
return (V==1) ? (2*args.n + 1) * GetBytes(args.precision) : 1 * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return (V==1) ? "GB/s" : "N/A"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0};
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp
index 6ab64af4..e2032256 100644
--- a/src/tuning/kernels/xgemm.cpp
+++ b/src/tuning/kernels/xgemm.cpp
@@ -18,9 +18,7 @@
#include "utilities/utilities.hpp"
#include "tuning/tuning.hpp"
-#define XGEMM_EXEC
-bool tStrategyFlag=true;
-#define DEFAULT_NUM_OF_STRATEGY 1
+
namespace clblast {
// =================================================================================================
@@ -45,7 +43,9 @@ class TuneXgemm {
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,tStrategy, psoSwarmSize, psoInfG, psoInfL, psoInfR};
+ return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
+ kArgHeuristicSelection, kArgMultiSearchStrategy, kArgPsoSwarmSize,
+ kArgPsoInfGlobal, kArgPsoInfLocal, kArgPsoInfRandom};
}
// Tests for valid arguments
@@ -58,7 +58,14 @@ class TuneXgemm {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 512.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 2; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 2; } // Full search and Random/PSO
+ static size_t DefaultSwarmSizePSO() { return 8; }
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }
+ static size_t DefaultHeuristic(){ return size_t{3};} // PSO
+ static double DefaultMaxTempAnn(){ return 1.0;}
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -173,6 +180,29 @@ class TuneXgemm {
return 2 * args.m * args.n * args.k;
}
static std::string PerformanceUnit() { return "GFLOPS"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+
+ // Multi Search Strategy is enable
+ if( args.multi_search_strategy){
+ if( V == 1){
+ return size_t{0};
+ }
+ else{
+ return args.heuristic_selection;
+ }
+ }
+
+ // Use full-search to explore all parameter combinations or random-search to search only a part of
+ // the parameter values. The fraction is set as a command-line argument.
+ if (args.fraction == 1.0 || args.fraction == 0.0) {
+ return size_t{0}; // Full search
+ }
+ else {
+ return args.heuristic_selection;
+ }
+ }
};
// =================================================================================================
@@ -196,49 +226,39 @@ void StartVariation(int argc, char *argv[]) {
}
}
+// Test multiple heuristics if kArgMultiSearchStrategy is enabled
+// Otherwise, run the heuristic specified in kArgMultiSearchStrategy
+void TestHeuristic(int argc, char *argv[]){
-// Main function (not within the clblast namespace)
-int main(int argc, char *argv[]) {
-
-int num_of_strategies = DEFAULT_NUM_OF_STRATEGY;
-
-if(const char* env_p = std::getenv("CK_TUNER_NUM_OF_STRATEGIES"))
-{
- num_of_strategies = atoi(env_p);
- printf("CK_TUNER_NUM_OF_STRATEGIES is: %s\n", env_p );
-}
-else
-{
- printf("CK_TUNER_NUM_OF_STRATEGIES is not defined\n");
-}
-
-printf("num_of_strategies : %d\n",num_of_strategies );
+ auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
+ auto help = std::string{""};
+ auto heuristic_selected = clblast::GetArgument(command_line_args, help, clblast::kArgMultiSearchStrategy, 0);
+ auto multi_search_strategy = clblast::GetArgument(command_line_args, help, clblast::kArgMultiSearchStrategy, 0);
-if( DEFAULT_NUM_OF_STRATEGY != num_of_strategies )
-{
- //FACCIO PRIMA LA FULL SEARCH
- StartVariation<1>(argc, argv);
+ if(multi_search_strategy){
+ StartVariation<1>(argc, argv);
+ StartVariation<2>(argc, argv);
+ }
+ else
+ {
+ switch(heuristic_selected){
+ case 1:
+ case 2:
+ case 3:
+ StartVariation<2>(argc, argv);
+ break;
+ case 0:
+ default:
+ StartVariation<1>(argc, argv);
+ break;
+ }
+ }
}
- auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
- auto help = std::string{"* Options given/available:\n"};
- auto localtStrategy = clblast::GetArgument(command_line_args, help, clblast::tStrategy, DEFAULT_STRATEGY);
- switch(localtStrategy)
- {
- case FULL_SEARCH_STRATEGY:
- StartVariation<1>(argc, argv);
- break;
- case RANDOM_SEARCH_STRATEGY:
- StartVariation<2>(argc, argv);
- break;
- case PSO_STRATEGY:
- StartVariation<3>(argc, argv);
- break;
- case DVDT_STRATEGY:
- StartVariation<2>(argc, argv);
- break;
- }
+// Main function (not within the clblast namespace)
+int main(int argc, char *argv[]) {
+ TestHeuristic(argc, argv);
return 0;
}
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp
index e7a35b93..3fb4ff34 100644
--- a/src/tuning/kernels/xgemm_direct.cpp
+++ b/src/tuning/kernels/xgemm_direct.cpp
@@ -41,7 +41,9 @@ class TuneXgemmDirect {
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
- return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction};
+ return {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
+ kArgHeuristicSelection, kArgMultiSearchStrategy, kArgPsoSwarmSize,
+ kArgPsoInfGlobal, kArgPsoInfLocal, kArgPsoInfRandom};
}
// Tests for valid arguments
@@ -54,7 +56,14 @@ class TuneXgemmDirect {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return (V==1) ? 1.0 : 32.0; } // test all or sample randomly
static size_t DefaultNumRuns() { return 4; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 2; } // Full search and Random/PSO
+ static size_t DefaultSwarmSizePSO() { return 8; }
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }
+ static size_t DefaultHeuristic(){ return size_t{3};} // PSO
+ static double DefaultMaxTempAnn(){ return 1.0;}
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &) { return 1; } // N/A for this kernel
static size_t GetSizeY(const Arguments<T> &) { return 1; } // N/A for this kernel
@@ -166,6 +175,29 @@ class TuneXgemmDirect {
return 2 * args.m * args.n * args.k;
}
static std::string PerformanceUnit() { return "GFLOPS"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+
+ // Multi Search Strategy is enable
+ if( args.multi_search_strategy){
+ if( V == 1){
+ return size_t{0};
+ }
+ else{
+ return args.heuristic_selection;
+ }
+ }
+
+ // Use full-search to explore all parameter combinations or random-search to search only a part of
+ // the parameter values. The fraction is set as a command-line argument.
+ if (args.fraction == 1.0 || args.fraction == 0.0) {
+ return size_t{0}; // Full search
+ }
+ else {
+ return args.heuristic_selection;
+ }
+ }
};
// =================================================================================================
@@ -188,6 +220,34 @@ void StartVariation(int argc, char *argv[]) {
case clblast::Precision::kComplexDouble: clblast::Tuner<clblast::TuneXgemmDirect<double2,V>, double2>(argc, argv); break;
}
}
+// Test multiple heuristics if kArgMultiSearchStrategy is enabled
+// Otherwise, run the heuristic specified in kArgMultiSearchStrategy
+void TestHeuristic(int argc, char *argv[]){
+
+ auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
+ auto help = std::string{""};
+ auto heuristic_selected = clblast::GetArgument(command_line_args, help, clblast::kArgMultiSearchStrategy, 0);
+ auto multi_search_strategy = clblast::GetArgument(command_line_args, help, clblast::kArgMultiSearchStrategy, 0);
+
+ if(multi_search_strategy){
+ StartVariation<1>(argc, argv);
+ StartVariation<2>(argc, argv);
+ }
+ else
+ {
+ switch(heuristic_selected){
+ case 1:
+ case 2:
+ case 3:
+ StartVariation<2>(argc, argv);
+ break;
+ case 0:
+ default:
+ StartVariation<1>(argc, argv);
+ break;
+ }
+ }
+}
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp
index 9e9a6fe1..7d23eeec 100644
--- a/src/tuning/kernels/xgemv.cpp
+++ b/src/tuning/kernels/xgemv.cpp
@@ -52,7 +52,14 @@ class TuneXgemv {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; }// N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};} // Full search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
static size_t GetSizeY(const Arguments<T> &args) { return args.m; }
@@ -148,6 +155,11 @@ class TuneXgemv {
return (args.m*args.n + 2*args.m + args.n) * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; //Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp
index c82a29b6..09c5ba3f 100644
--- a/src/tuning/kernels/xger.cpp
+++ b/src/tuning/kernels/xger.cpp
@@ -49,7 +49,14 @@ class TuneXger {
static size_t DefaultBatchCount() { return 1; } // N/A for this kernel
static double DefaultFraction() { return 1.0; } // N/A for this kernel
static size_t DefaultNumRuns() { return 10; } // run every kernel this many times for averaging
-
+ static size_t DefaultNumSearchStragegy() { return 1; } // N/A for this kernel
+ static size_t DefaultSwarmSizePSO() { return 8; } // N/A for this kernel
+ static double DefaultInfluenceGlobalPSO(){ return 0.1; }// N/A for this kernel
+ static double DefaultInfluenceLocalPSO(){ return 0.3; } // N/A for this kernel
+ static double DefaultInfluenceRandomPSO(){ return 0.6; }// N/A for this kernel
+ static size_t DefaultHeuristic(){ return size_t{0};}// Full search
+ static double DefaultMaxTempAnn(){ return 1.0;}// N/A for this kernel
+
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) { return args.m; }
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
@@ -107,6 +114,11 @@ class TuneXger {
return (2*args.m*args.n + args.m + args.n) * GetBytes(args.precision);
}
static std::string PerformanceUnit() { return "GB/s"; }
+
+ // Returns which Heuristic to run
+ static size_t GetCurrentHeuristic(const Arguments<T> &args){
+ return size_t{0}; // Full search
+ }
};
// =================================================================================================
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 35b320cb..2e0eb5a1 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -48,11 +48,13 @@ void Tuner(int argc, char* argv[]) {
if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<T>()); }
if (o == kArgFraction) { args.fraction = GetArgument(command_line_args, help, kArgFraction, C::DefaultFraction()); }
if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, C::DefaultBatchCount()); }
- if (o == tStrategy) {args.tStrategy = GetArgument(command_line_args, help, tStrategy, DEFAULT_STRATEGY); }
- if (o == psoSwarmSize) {args.psoSwarmSize = GetArgument(command_line_args, help, psoSwarmSize, DEFAULT_PSO_SWARM); }
- if (o == psoInfG) {args.psoInfG = GetArgument(command_line_args, help, psoInfG, DEFAULT_PSO_G); }
- if (o == psoInfL) {args.psoInfL = GetArgument(command_line_args, help, psoInfL, DEFAULT_PSO_L); }
- if (o == psoInfR) {args.psoInfR = GetArgument(command_line_args, help, psoInfR, DEFAULT_PSO_R); }
+ if (o == kArgHeuristicSelection) {args.heuristic_selection = GetArgument(command_line_args, help, kArgHeuristicSelection, C::DefaultHeuristic()); }
+ if (o == kArgMultiSearchStrategy) {args.multi_search_strategy = GetArgument(command_line_args, help, kArgMultiSearchStrategy, 0);}
+ if (o == kArgPsoSwarmSize) {args.pso_swarm_size = GetArgument(command_line_args, help, kArgPsoSwarmSize , C::DefaultSwarmSizePSO()); }
+ if (o == kArgPsoInfGlobal) {args.pso_inf_global = GetArgument(command_line_args, help, kArgPsoInfGlobal, C::DefaultInfluenceGlobalPSO()); }
+ if (o == kArgPsoInfLocal) {args.pso_inf_local = GetArgument(command_line_args, help, kArgPsoInfLocal, C::DefaultInfluenceLocalPSO()); }
+ if (o == kArgPsoInfRandom) {args.pso_inf_random = GetArgument(command_line_args, help, kArgPsoInfRandom, C::DefaultInfluenceRandomPSO()); }
+ if (o == kArgAnnMaxTemp) {args.ann_max_temperature = GetArgument(command_line_args, help, kArgAnnMaxTemp, C::DefaultMaxTempAnn());}
}
const auto num_runs = GetArgument(command_line_args, help, kArgNumRuns, C::DefaultNumRuns());
@@ -96,48 +98,17 @@ void Tuner(int argc, char* argv[]) {
// Initializes the tuner for the chosen device
cltune::Tuner tuner(args.platform_id, args.device_id);
- // Use full-search to explore all parameter combinations or random-search to search only a part of
- // the parameter values. The fraction is set as a command-line argument.
- #ifdef XGEMM_EXEC
+ // Select the search method based on the cmd_line arguments
+ // If the tuner does not support the selected choice, Full Search will be returned.
+ auto method = C::GetCurrentHeuristic(args);
- if(tStrategyFlag)
- {
- auto localtStrategy = args.tStrategy;
-
- if (args.fraction == 1.0 || args.fraction == 0.0)
- {
- localtStrategy = FULL_SEARCH_STRATEGY;
- }
- switch (localtStrategy)
- {
- case FULL_SEARCH_STRATEGY:
- tuner.UseFullSearch();
- break;
-
- case RANDOM_SEARCH_STRATEGY:
- tuner.UseRandomSearch(1.0/args.fraction);
- break;
- case PSO_STRATEGY:
- tuner.UsePSO(1.0/args.fraction, args.psoSwarmSize, args.psoInfG, args.psoInfL, args.psoInfR);
- break;
- case DVDT_STRATEGY:
- default:
- tuner.UseFullSearch();
- }
+ if (method == 1) { tuner.UseRandomSearch(1.0/args.fraction); }
+ else if (method == 2) { tuner.UseAnnealing(args.fraction, args.ann_max_temperature); }
+ else if (method == 3) {
+ tuner.UsePSO(args.fraction, args.pso_swarm_size, args.pso_inf_global, args.pso_inf_local, args.pso_inf_random);
}
+ else { tuner.UseFullSearch(); }
- #else
-
- if (args.fraction == 1.0 || args.fraction == 0.0)
- {
- tuner.UseFullSearch();
- }
- else
- {
- tuner.UseRandomSearch(1.0/args.fraction);
- }
-
- #endif
// Set extra settings for specific defines. This mimics src/routine.cc.
auto defines = std::string{""};
if (isAMD && isGPU) {
@@ -201,7 +172,6 @@ void Tuner(int argc, char* argv[]) {
}
tuner.PrintJSON("clblast_"+C::KernelFamily()+"_"+precision_string+".json", metadata);
-
}
// =================================================================================================
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 54214c49..e7e95bbb 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -28,21 +28,6 @@
#include "utilities/clblast_exceptions.hpp"
#include "utilities/msvc.hpp"
-#define FULL_SEARCH_STRATEGY 0
-#define RANDOM_SEARCH_STRATEGY 1
-#define PSO_STRATEGY 2
-#define DVDT_STRATEGY 3
-
-#define DEFAULT_STRATEGY 0
-
-#define DEFAULT_PSO_SWARM 8
-#define DEFAULT_PSO_G 0.3
-#define DEFAULT_PSO_L 0.6
-#define DEFAULT_PSO_R 0.1
-
-#ifdef XGEMM_EXEC
-extern bool tStrategyFlag;
-#endif
namespace clblast {
// =================================================================================================
@@ -62,12 +47,6 @@ constexpr auto kUnknownError = -999;
// =================================================================================================
-constexpr auto tStrategy = "strategy";
-constexpr auto psoSwarmSize = "psoSwarmSize";
-constexpr auto psoInfG = "psoInfG";
-constexpr auto psoInfL = "psoInfL";
-constexpr auto psoInfR = "psoInfR";
-
// The routine-specific arguments in string form
constexpr auto kArgM = "m";
constexpr auto kArgN = "n";
@@ -101,6 +80,15 @@ constexpr auto kArgBatchCount = "batch_num";
// The tuner-specific arguments in string form
constexpr auto kArgFraction = "fraction";
+constexpr auto kArgHeuristicSelection = "heuristic";
+constexpr auto kArgMultiSearchStrategy = "multi_strategy";
+// PSO tuner-specific arguments in string form
+constexpr auto kArgPsoSwarmSize = "pso_swarm_size";
+constexpr auto kArgPsoInfGlobal = "pso_inf_global";
+constexpr auto kArgPsoInfLocal = "pso_inf_local";
+constexpr auto kArgPsoInfRandom = "pso_inf_random";
+// Annealing tuner-specific arguments in string form
+constexpr auto kArgAnnMaxTemp = "ann_max_temperature";
// The client-specific arguments in string form
constexpr auto kArgCompareclblas = "clblas";
@@ -193,13 +181,13 @@ struct Arguments {
T beta = ConstantOne<T>();
// Batch-specific arguments
size_t batch_count = 1;
- std::vector<size_t> x_offsets = {0};
- std::vector<size_t> y_offsets = {0};
- std::vector<size_t> a_offsets = {0};
- std::vector<size_t> b_offsets = {0};
- std::vector<size_t> c_offsets = {0};
- std::vector<T> alphas = {ConstantOne<T>()};
- std::vector<T> betas = {ConstantOne<T>()};
+ std::vector<size_t> x_offsets; // = {0};
+ std::vector<size_t> y_offsets; // = {0};
+ std::vector<size_t> a_offsets; // = {0};
+ std::vector<size_t> b_offsets; // = {0};
+ std::vector<size_t> c_offsets; // = {0};
+ std::vector<T> alphas; // = {ConstantOne<T>()};
+ std::vector<T> betas; // = {ConstantOne<T>()};
// Sizes
size_t x_size = 1;
size_t y_size = 1;
@@ -209,7 +197,14 @@ struct Arguments {
size_t ap_size = 1;
size_t scalar_size = 1;
// Tuner-specific arguments
+ size_t heuristic_selection = 0;
+ size_t multi_search_strategy = 0;
double fraction = 1.0;
+ size_t pso_swarm_size = 8;
+ double pso_inf_global = 0.3;
+ double pso_inf_local = 0.6;
+ double pso_inf_random = 0.1;
+ double ann_max_temperature = 1.0; // Is it a valid default value?
// Client-specific arguments
int compare_clblas = 1;
int compare_cblas = 1;
@@ -227,16 +222,8 @@ struct Arguments {
bool print_help = false;
bool silent = false;
bool no_abbrv = false;
-
- int tStrategy = DEFAULT_STRATEGY;
- size_t psoSwarmSize = DEFAULT_PSO_SWARM;
- double psoInfG = DEFAULT_PSO_G;
- double psoInfL = DEFAULT_PSO_L;
- double psoInfR = DEFAULT_PSO_R;
};
-
-
// Structure containing all possible buffers for test clients
template <typename T>
struct Buffers {