summaryrefslogtreecommitdiff
path: root/src/tuning
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-10 14:35:11 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-10 14:35:11 +0100
commit3d2ef9331bd01bfc31d6725576c8ccb4eadbbc1c (patch)
tree8835c9dc1f02c58e8a9485a94d27fd2af123dc82 /src/tuning
parent0bdc51e47c97ef8c810d89cc8b2307bea4cbe852 (diff)
Fixed a few things for the new tuning API
Diffstat (limited to 'src/tuning')
-rw-r--r--src/tuning/kernels/copy_fast.cpp10
-rw-r--r--src/tuning/kernels/copy_fast.hpp10
-rw-r--r--src/tuning/kernels/copy_pad.cpp10
-rw-r--r--src/tuning/kernels/copy_pad.hpp10
-rw-r--r--src/tuning/kernels/invert.cpp10
-rw-r--r--src/tuning/kernels/invert.hpp10
-rw-r--r--src/tuning/kernels/transpose_fast.cpp10
-rw-r--r--src/tuning/kernels/transpose_fast.hpp10
-rw-r--r--src/tuning/kernels/transpose_pad.cpp10
-rw-r--r--src/tuning/kernels/transpose_pad.hpp10
-rw-r--r--src/tuning/kernels/xaxpy.cpp10
-rw-r--r--src/tuning/kernels/xaxpy.hpp10
-rw-r--r--src/tuning/kernels/xdot.cpp10
-rw-r--r--src/tuning/kernels/xdot.hpp10
-rw-r--r--src/tuning/kernels/xgemm.cpp10
-rw-r--r--src/tuning/kernels/xgemm.hpp10
-rw-r--r--src/tuning/kernels/xgemm_direct.cpp10
-rw-r--r--src/tuning/kernels/xgemm_direct.hpp10
-rw-r--r--src/tuning/kernels/xgemv.cpp10
-rw-r--r--src/tuning/kernels/xgemv.hpp10
-rw-r--r--src/tuning/kernels/xger.cpp10
-rw-r--r--src/tuning/kernels/xger.hpp10
-rw-r--r--src/tuning/tuning_api.cpp67
23 files changed, 148 insertions, 139 deletions
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp
index a71dd083..0314113c 100644
--- a/src/tuning/kernels/copy_fast.cpp
+++ b/src/tuning/kernels/copy_fast.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<half>, clblast::CopyTestValidArguments<half>, clblast::CopySetConstraints, clblast::CopySetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<float>, clblast::CopyTestValidArguments<float>, clblast::CopySetConstraints, clblast::CopySetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<double>, clblast::CopyTestValidArguments<double>, clblast::CopySetConstraints, clblast::CopySetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<float2>, clblast::CopyTestValidArguments<float2>, clblast::CopySetConstraints, clblast::CopySetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<double2>, clblast::CopyTestValidArguments<double2>, clblast::CopySetConstraints, clblast::CopySetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/copy_fast.hpp b/src/tuning/kernels/copy_fast.hpp
index eab1c7dd..f9a58bc7 100644
--- a/src/tuning/kernels/copy_fast.hpp
+++ b/src/tuning/kernels/copy_fast.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults CopyGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha};
settings.default_m = 1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings CopyGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void CopyTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> CopySetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void CopySetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, buffers[2]()); // 2 == A matrix
kernel.SetArgument(2, buffers[3]()); // 3 == B matrix
diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp
index e32a5746..909a71c8 100644
--- a/src/tuning/kernels/copy_pad.cpp
+++ b/src/tuning/kernels/copy_pad.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<half>, clblast::PadTestValidArguments<half>, clblast::PadSetConstraints, clblast::PadSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<float>, clblast::PadTestValidArguments<float>, clblast::PadSetConstraints, clblast::PadSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<double>, clblast::PadTestValidArguments<double>, clblast::PadSetConstraints, clblast::PadSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<float2>, clblast::PadTestValidArguments<float2>, clblast::PadSetConstraints, clblast::PadSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<double2>, clblast::PadTestValidArguments<double2>, clblast::PadSetConstraints, clblast::PadSetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/copy_pad.hpp b/src/tuning/kernels/copy_pad.hpp
index b39c0318..e612ca9e 100644
--- a/src/tuning/kernels/copy_pad.hpp
+++ b/src/tuning/kernels/copy_pad.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults PadGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha};
settings.default_m = 1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings PadGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void PadTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> PadSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void PadSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
kernel.SetArgument(2, static_cast<int>(args.m));
diff --git a/src/tuning/kernels/invert.cpp b/src/tuning/kernels/invert.cpp
index ecd51199..3dfeb508 100644
--- a/src/tuning/kernels/invert.cpp
+++ b/src/tuning/kernels/invert.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<half>, clblast::InvertTestValidArguments<half>, clblast::InvertSetConstraints, clblast::InvertSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<float>, clblast::InvertTestValidArguments<float>, clblast::InvertSetConstraints, clblast::InvertSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<double>, clblast::InvertTestValidArguments<double>, clblast::InvertSetConstraints, clblast::InvertSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<float2>, clblast::InvertTestValidArguments<float2>, clblast::InvertSetConstraints, clblast::InvertSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<double2>, clblast::InvertTestValidArguments<double2>, clblast::InvertSetConstraints, clblast::InvertSetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/invert.hpp b/src/tuning/kernels/invert.hpp
index 0178a2aa..0a0c9ce2 100644
--- a/src/tuning/kernels/invert.hpp
+++ b/src/tuning/kernels/invert.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults InvertGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgN, kArgM, kArgK};
settings.default_n = 128; // dimension of input matrix 'n'
@@ -32,7 +32,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings InvertGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -81,16 +81,16 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &args) {
+void InvertTestValidArguments(const int, const Arguments<T> &args) {
if (!(args.k == 16)) {
throw std::runtime_error("'TripleMatMul16Part1Lower' requires 'k' to be 16");
}
}
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+std::vector<Constraint> InvertSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void InvertSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
const auto num_pages = CeilDiv(args.n, args.k * 2); // CeilDiv(n, current_size*2)
kernel.SetArgument(0, static_cast<int>(args.n)); // n
kernel.SetArgument(1, buffers[2]()); // 2 == A matrix
diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp
index 2144f23f..6b37a31d 100644
--- a/src/tuning/kernels/transpose_fast.cpp
+++ b/src/tuning/kernels/transpose_fast.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<half>, clblast::TransposeTestValidArguments<half>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<float>, clblast::TransposeTestValidArguments<float>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<double>, clblast::TransposeTestValidArguments<double>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<float2>, clblast::TransposeTestValidArguments<float2>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<double2>, clblast::TransposeTestValidArguments<double2>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/transpose_fast.hpp b/src/tuning/kernels/transpose_fast.hpp
index 47859f7a..e8917ad2 100644
--- a/src/tuning/kernels/transpose_fast.hpp
+++ b/src/tuning/kernels/transpose_fast.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults TransposeGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha};
settings.default_m = 1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings TransposeGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void TransposeTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> TransposeSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void TransposeSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, buffers[2]()); // 2 == A matrix
kernel.SetArgument(2, buffers[3]()); // 3 == B matrix
diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp
index ce39d857..fc7244f6 100644
--- a/src/tuning/kernels/transpose_pad.cpp
+++ b/src/tuning/kernels/transpose_pad.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<half>, clblast::PadtransposeTestValidArguments<half>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<float>, clblast::PadtransposeTestValidArguments<float>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<double>, clblast::PadtransposeTestValidArguments<double>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<float2>, clblast::PadtransposeTestValidArguments<float2>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<double2>, clblast::PadtransposeTestValidArguments<double2>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/transpose_pad.hpp b/src/tuning/kernels/transpose_pad.hpp
index c9ce983e..8d24a0dc 100644
--- a/src/tuning/kernels/transpose_pad.hpp
+++ b/src/tuning/kernels/transpose_pad.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults PadtransposeGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha};
settings.default_m = 1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings PadtransposeGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -76,12 +76,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void PadtransposeTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> PadtransposeSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void PadtransposeSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
kernel.SetArgument(2, static_cast<int>(args.m));
diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp
index a290b6c1..6a95600d 100644
--- a/src/tuning/kernels/xaxpy.cpp
+++ b/src/tuning/kernels/xaxpy.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<half>, clblast::XaxpyTestValidArguments<half>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<float>, clblast::XaxpyTestValidArguments<float>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<double>, clblast::XaxpyTestValidArguments<double>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<float2>, clblast::XaxpyTestValidArguments<float2>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<double2>, clblast::XaxpyTestValidArguments<double2>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/xaxpy.hpp b/src/tuning/kernels/xaxpy.hpp
index 266a671b..24550ed9 100644
--- a/src/tuning/kernels/xaxpy.hpp
+++ b/src/tuning/kernels/xaxpy.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults XaxpyGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgN, kArgAlpha};
settings.default_n = 4096*1024;
@@ -30,7 +30,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings XaxpyGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -75,16 +75,16 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &args) {
+void XaxpyTestValidArguments(const int, const Arguments<T> &args) {
if (!IsMultiple(args.n, 64)) {
throw std::runtime_error("'XaxpyFastest' requires 'n' to be a multiple of WGS*WPT*VW");
}
}
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+std::vector<Constraint> XaxpySetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XaxpySetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.n));
kernel.SetArgument(1, GetRealArg(args.alpha));
kernel.SetArgument(2, buffers[0]()); // 0 == X vector
diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp
index 10126392..6d10c4d8 100644
--- a/src/tuning/kernels/xdot.cpp
+++ b/src/tuning/kernels/xdot.cpp
@@ -24,11 +24,11 @@ template <int V>
void StartVariation(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<half>, clblast::XdotTestValidArguments<half>, clblast::XdotSetConstraints, clblast::XdotSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<float>, clblast::XdotTestValidArguments<float>, clblast::XdotSetConstraints, clblast::XdotSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<double>, clblast::XdotTestValidArguments<double>, clblast::XdotSetConstraints, clblast::XdotSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<float2>, clblast::XdotTestValidArguments<float2>, clblast::XdotSetConstraints, clblast::XdotSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<double2>, clblast::XdotTestValidArguments<double2>, clblast::XdotSetConstraints, clblast::XdotSetArguments<double2>); break;
}
}
diff --git a/src/tuning/kernels/xdot.hpp b/src/tuning/kernels/xdot.hpp
index 456baea4..15673c79 100644
--- a/src/tuning/kernels/xdot.hpp
+++ b/src/tuning/kernels/xdot.hpp
@@ -22,7 +22,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults XdotGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgN};
settings.default_n = 2*1024*1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
+TunerSettings XdotGetTunerSettings(const int V, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -74,12 +74,12 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void XdotTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> XdotSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XdotSetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
if (V == 1) {
kernel.SetArgument(0, static_cast<int>(args.n));
kernel.SetArgument(1, buffers[0]()); // 0 == X vector
diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp
index f388c76c..d365ce6d 100644
--- a/src/tuning/kernels/xgemm.cpp
+++ b/src/tuning/kernels/xgemm.cpp
@@ -23,11 +23,11 @@ template <int V>
void StartVariation(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<half>, clblast::XgemmTestValidArguments<half>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<float>, clblast::XgemmTestValidArguments<float>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<double>, clblast::XgemmTestValidArguments<double>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<float2>, clblast::XgemmTestValidArguments<float2>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<double2>, clblast::XgemmTestValidArguments<double2>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<double2>); break;
}
}
diff --git a/src/tuning/kernels/xgemm.hpp b/src/tuning/kernels/xgemm.hpp
index 09ebe91d..66e197e1 100644
--- a/src/tuning/kernels/xgemm.hpp
+++ b/src/tuning/kernels/xgemm.hpp
@@ -23,7 +23,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int V) {
+TunerDefaults XgemmGetTunerDefaults(const int V) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
kArgHeuristicSelection, kArgPsoSwarmSize,
@@ -38,7 +38,7 @@ TunerDefaults GetTunerDefaults(const int V) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
+TunerSettings XgemmGetTunerSettings(const int V, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -118,8 +118,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int V) {
+void XgemmTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> XgemmSetConstraints(const int V) {
auto constraints = std::vector<Constraint>();
auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); };
auto MultipleOfXMulY = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]*v[2]); };
@@ -148,7 +148,7 @@ std::vector<Constraint> SetConstraints(const int V) {
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XgemmSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
kernel.SetArgument(2, static_cast<int>(args.k));
diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp
index b059ad62..7298a6c3 100644
--- a/src/tuning/kernels/xgemm_direct.cpp
+++ b/src/tuning/kernels/xgemm_direct.cpp
@@ -23,11 +23,11 @@ template <int V>
void StartVariation(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<half>, clblast::XgemmDirectTestValidArguments<half>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<float>, clblast::XgemmDirectTestValidArguments<float>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<double>, clblast::XgemmDirectTestValidArguments<double>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<float2>, clblast::XgemmDirectTestValidArguments<float2>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<double2>, clblast::XgemmDirectTestValidArguments<double2>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<double2>); break;
}
}
diff --git a/src/tuning/kernels/xgemm_direct.hpp b/src/tuning/kernels/xgemm_direct.hpp
index 7cc9f654..ecb10bc6 100644
--- a/src/tuning/kernels/xgemm_direct.hpp
+++ b/src/tuning/kernels/xgemm_direct.hpp
@@ -23,7 +23,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int V) {
+TunerDefaults XgemmDirectGetTunerDefaults(const int V) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction,
kArgHeuristicSelection, kArgPsoSwarmSize,
@@ -38,7 +38,7 @@ TunerDefaults GetTunerDefaults(const int V) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
+TunerSettings XgemmDirectGetTunerSettings(const int V, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -109,8 +109,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int V) {
+void XgemmDirectTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> XgemmDirectSetConstraints(const int V) {
auto constraints = std::vector<Constraint>();
auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); };
auto MultipleOfXMulY = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]*v[2]); };
@@ -138,7 +138,7 @@ std::vector<Constraint> SetConstraints(const int V) {
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XgemmDirectSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
kernel.SetArgument(2, static_cast<int>(args.k));
diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp
index 165c5628..9e45d73f 100644
--- a/src/tuning/kernels/xgemv.cpp
+++ b/src/tuning/kernels/xgemv.cpp
@@ -23,11 +23,11 @@ template <int V>
void StartVariation(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<half>, clblast::XgemvTestValidArguments<half>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<float>, clblast::XgemvTestValidArguments<float>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<double>, clblast::XgemvTestValidArguments<double>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<float2>, clblast::XgemvTestValidArguments<float2>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<double2>, clblast::XgemvTestValidArguments<double2>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<double2>); break;
}
}
diff --git a/src/tuning/kernels/xgemv.hpp b/src/tuning/kernels/xgemv.hpp
index 87186195..e44efe32 100644
--- a/src/tuning/kernels/xgemv.hpp
+++ b/src/tuning/kernels/xgemv.hpp
@@ -24,7 +24,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults XgemvGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha, kArgBeta};
settings.default_m = 2048;
@@ -35,7 +35,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
+TunerSettings XgemvGetTunerSettings(const int V, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -96,8 +96,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int V) {
+void XgemvTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> XgemvSetConstraints(const int V) {
auto constraints = std::vector<Constraint>();
if (V==2 || V==3) {
auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); };
@@ -112,7 +112,7 @@ std::vector<Constraint> SetConstraints(const int V) {
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XgemvSetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
auto a_rotated = (V==3) ? 1 : 0;
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp
index 8a90f340..6dfc9ffa 100644
--- a/src/tuning/kernels/xger.cpp
+++ b/src/tuning/kernels/xger.cpp
@@ -22,11 +22,11 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
switch(clblast::GetPrecision(command_line_args)) {
- case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break;
- case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break;
- case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break;
- case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break;
- case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break;
+ case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<half>, clblast::XgerTestValidArguments<half>, clblast::XgerSetConstraints, clblast::XgerSetArguments<half>); break;
+ case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<float>, clblast::XgerTestValidArguments<float>, clblast::XgerSetConstraints, clblast::XgerSetArguments<float>); break;
+ case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<double>, clblast::XgerTestValidArguments<double>, clblast::XgerSetConstraints, clblast::XgerSetArguments<double>); break;
+ case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<float2>, clblast::XgerTestValidArguments<float2>, clblast::XgerSetConstraints, clblast::XgerSetArguments<float2>); break;
+ case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<double2>, clblast::XgerTestValidArguments<double2>, clblast::XgerSetConstraints, clblast::XgerSetArguments<double2>); break;
}
return 0;
}
diff --git a/src/tuning/kernels/xger.hpp b/src/tuning/kernels/xger.hpp
index 01d85c9e..afd2f36e 100644
--- a/src/tuning/kernels/xger.hpp
+++ b/src/tuning/kernels/xger.hpp
@@ -21,7 +21,7 @@ namespace clblast {
// =================================================================================================
// Settings for this kernel (default command-line arguments)
-TunerDefaults GetTunerDefaults(const int) {
+TunerDefaults XgerGetTunerDefaults(const int) {
auto settings = TunerDefaults();
settings.options = {kArgM, kArgN, kArgAlpha};
settings.default_m = 1024;
@@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) {
// Settings for this kernel (general)
template <typename T>
-TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
+TunerSettings XgerGetTunerSettings(const int, const Arguments<T> &args) {
auto settings = TunerSettings();
// Identification of the kernel
@@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) {
// Tests for valid arguments
template <typename T>
-void TestValidArguments(const int, const Arguments<T> &) { }
-std::vector<Constraint> SetConstraints(const int) { return {}; }
+void XgerTestValidArguments(const int, const Arguments<T> &) { }
+std::vector<Constraint> XgerSetConstraints(const int) { return {}; }
// Sets the kernel's arguments
template <typename T>
-void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
+void XgerSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) {
kernel.SetArgument(0, static_cast<int>(args.m));
kernel.SetArgument(1, static_cast<int>(args.n));
kernel.SetArgument(2, GetRealArg(args.alpha));
diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp
index 4ffb46c2..5900378a 100644
--- a/src/tuning/tuning_api.cpp
+++ b/src/tuning/tuning_api.cpp
@@ -17,10 +17,19 @@
#include <random>
#include <utility>
#include <algorithm>
-#include <cstdio>
#include "tuning/tuning.hpp"
+#include "tuning/kernels/xaxpy.hpp"
+#include "tuning/kernels/xdot.hpp"
+#include "tuning/kernels/xgemv.hpp"
+#include "tuning/kernels/xger.hpp"
+#include "tuning/kernels/xgemm.hpp"
+#include "tuning/kernels/xgemm_direct.hpp"
#include "tuning/kernels/copy_fast.hpp"
+#include "tuning/kernels/copy_pad.hpp"
+#include "tuning/kernels/transpose_fast.hpp"
+#include "tuning/kernels/transpose_pad.hpp"
+#include "tuning/kernels/invert.hpp"
namespace clblast {
// =================================================================================================
@@ -30,8 +39,8 @@ StatusCode TuneXaxpy(RawCommandQueue * queue, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, XaxpyGetTunerDefaults, XaxpyGetTunerSettings<T>,
+ XaxpyTestValidArguments<T>, XaxpySetConstraints, XaxpySetArguments<T>, parameters);
}
template StatusCode TuneXaxpy<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXaxpy<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -44,11 +53,11 @@ StatusCode TuneXdot(RawCommandQueue * queue, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.n = n;
auto queue_cpp = Queue(*queue);
- auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ auto status = TunerAPI<T>(queue_cpp, args, 1, XdotGetTunerDefaults, XdotGetTunerSettings<T>,
+ XdotTestValidArguments<T>, XdotSetConstraints, XdotSetArguments<T>, parameters);
if (status != StatusCode::kSuccess) { return status; }
- return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 2, XdotGetTunerDefaults, XdotGetTunerSettings<T>,
+ XdotTestValidArguments<T>, XdotSetConstraints, XdotSetArguments<T>, parameters);
}
template StatusCode TuneXdot<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXdot<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -61,14 +70,14 @@ StatusCode TuneXgemv(RawCommandQueue * queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ auto status = TunerAPI<T>(queue_cpp, args, 1, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>,
+ XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters);
if (status != StatusCode::kSuccess) { return status; }
- status = TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ status = TunerAPI<T>(queue_cpp, args, 2, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>,
+ XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters);
if (status != StatusCode::kSuccess) { return status; }
- return TunerAPI<T>(queue_cpp, args, 3, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 3, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>,
+ XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters);
}
template StatusCode TuneXgemv<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXgemv<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -81,8 +90,8 @@ StatusCode TuneXger(RawCommandQueue * queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, XgerGetTunerDefaults, XgerGetTunerSettings<T>,
+ XgerTestValidArguments<T>, XgerSetConstraints, XgerSetArguments<T>, parameters);
}
template StatusCode TuneXger<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXger<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -95,8 +104,8 @@ StatusCode TuneXgemm(RawCommandQueue * queue, const size_t m, const size_t n, co
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 2, XgemmGetTunerDefaults, XgemmGetTunerSettings<T>,
+ XgemmTestValidArguments<T>, XgemmSetConstraints, XgemmSetArguments<T>, parameters);
}
template StatusCode TuneXgemm<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXgemm<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -109,8 +118,8 @@ StatusCode TuneXgemmDirect(RawCommandQueue * queue, const size_t m, const size_t
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 2, XgemmDirectGetTunerDefaults, XgemmDirectGetTunerSettings<T>,
+ XgemmDirectTestValidArguments<T>, XgemmDirectSetConstraints, XgemmDirectSetArguments<T>, parameters);
}
template StatusCode TuneXgemmDirect<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneXgemmDirect<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -123,8 +132,8 @@ StatusCode TuneCopy(RawCommandQueue * queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, CopyGetTunerDefaults, CopyGetTunerSettings<T>,
+ CopyTestValidArguments<T>, CopySetConstraints, CopySetArguments<T>, parameters);
}
template StatusCode TuneCopy<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneCopy<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -137,8 +146,8 @@ StatusCode TunePad(RawCommandQueue * queue, const size_t m, const size_t n,
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, PadGetTunerDefaults, PadGetTunerSettings<T>,
+ PadTestValidArguments<T>, PadSetConstraints, PadSetArguments<T>, parameters);
}
template StatusCode TunePad<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TunePad<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -151,8 +160,8 @@ StatusCode TuneTranspose(RawCommandQueue * queue, const size_t m, const size_t n
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, TransposeGetTunerDefaults, TransposeGetTunerSettings<T>,
+ TransposeTestValidArguments<T>, TransposeSetConstraints, TransposeSetArguments<T>, parameters);
}
template StatusCode TuneTranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneTranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -165,8 +174,8 @@ StatusCode TunePadtranspose(RawCommandQueue * queue, const size_t m, const size_
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, PadtransposeGetTunerDefaults, PadtransposeGetTunerSettings<T>,
+ PadtransposeTestValidArguments<T>, PadtransposeSetConstraints, PadtransposeSetArguments<T>, parameters);
}
template StatusCode TunePadtranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TunePadtranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
@@ -179,8 +188,8 @@ StatusCode TuneInvert(RawCommandQueue * queue, const size_t m, const size_t n, c
const double fraction, std::unordered_map<std::string,size_t> &parameters) {
auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k;
auto queue_cpp = Queue(*queue);
- return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>,
- TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters);
+ return TunerAPI<T>(queue_cpp, args, 0, InvertGetTunerDefaults, InvertGetTunerSettings<T>,
+ InvertTestValidArguments<T>, InvertSetConstraints, InvertSetArguments<T>, parameters);
}
template StatusCode TuneInvert<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);
template StatusCode TuneInvert<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&);