diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 14:35:11 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 14:35:11 +0100 |
commit | 3d2ef9331bd01bfc31d6725576c8ccb4eadbbc1c (patch) | |
tree | 8835c9dc1f02c58e8a9485a94d27fd2af123dc82 /src/tuning | |
parent | 0bdc51e47c97ef8c810d89cc8b2307bea4cbe852 (diff) |
Fixed a few things for the new tuning API
Diffstat (limited to 'src/tuning')
-rw-r--r-- | src/tuning/kernels/copy_fast.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/copy_fast.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/copy_pad.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/copy_pad.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/invert.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/invert.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_fast.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_fast.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_pad.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/transpose_pad.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xaxpy.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xaxpy.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xdot.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xdot.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm_direct.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemm_direct.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemv.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xgemv.hpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xger.cpp | 10 | ||||
-rw-r--r-- | src/tuning/kernels/xger.hpp | 10 | ||||
-rw-r--r-- | src/tuning/tuning_api.cpp | 67 |
23 files changed, 148 insertions, 139 deletions
diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp index a71dd083..0314113c 100644 --- a/src/tuning/kernels/copy_fast.cpp +++ b/src/tuning/kernels/copy_fast.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<half>, clblast::CopyTestValidArguments<half>, clblast::CopySetConstraints, clblast::CopySetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<float>, clblast::CopyTestValidArguments<float>, clblast::CopySetConstraints, clblast::CopySetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<double>, clblast::CopyTestValidArguments<double>, clblast::CopySetConstraints, clblast::CopySetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<float2>, clblast::CopyTestValidArguments<float2>, clblast::CopySetConstraints, clblast::CopySetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::CopyGetTunerDefaults, clblast::CopyGetTunerSettings<double2>, clblast::CopyTestValidArguments<double2>, clblast::CopySetConstraints, clblast::CopySetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/copy_fast.hpp b/src/tuning/kernels/copy_fast.hpp index eab1c7dd..f9a58bc7 100644 --- a/src/tuning/kernels/copy_fast.hpp +++ b/src/tuning/kernels/copy_fast.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults CopyGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha}; settings.default_m = 1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings CopyGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void CopyTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> CopySetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void CopySetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, buffers[2]()); // 2 == A matrix kernel.SetArgument(2, buffers[3]()); // 3 == B matrix diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp index e32a5746..909a71c8 100644 --- a/src/tuning/kernels/copy_pad.cpp +++ b/src/tuning/kernels/copy_pad.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<half>, clblast::PadTestValidArguments<half>, clblast::PadSetConstraints, clblast::PadSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<float>, clblast::PadTestValidArguments<float>, clblast::PadSetConstraints, clblast::PadSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<double>, clblast::PadTestValidArguments<double>, clblast::PadSetConstraints, clblast::PadSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<float2>, clblast::PadTestValidArguments<float2>, clblast::PadSetConstraints, clblast::PadSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::PadGetTunerDefaults, clblast::PadGetTunerSettings<double2>, clblast::PadTestValidArguments<double2>, clblast::PadSetConstraints, clblast::PadSetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/copy_pad.hpp b/src/tuning/kernels/copy_pad.hpp index b39c0318..e612ca9e 100644 --- a/src/tuning/kernels/copy_pad.hpp +++ b/src/tuning/kernels/copy_pad.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults PadGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha}; settings.default_m = 1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings PadGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void PadTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> PadSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void PadSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); kernel.SetArgument(2, static_cast<int>(args.m)); diff --git a/src/tuning/kernels/invert.cpp b/src/tuning/kernels/invert.cpp index ecd51199..3dfeb508 100644 --- a/src/tuning/kernels/invert.cpp +++ b/src/tuning/kernels/invert.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<half>, clblast::InvertTestValidArguments<half>, clblast::InvertSetConstraints, clblast::InvertSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<float>, clblast::InvertTestValidArguments<float>, clblast::InvertSetConstraints, clblast::InvertSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<double>, clblast::InvertTestValidArguments<double>, clblast::InvertSetConstraints, clblast::InvertSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<float2>, clblast::InvertTestValidArguments<float2>, clblast::InvertSetConstraints, clblast::InvertSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::InvertGetTunerDefaults, clblast::InvertGetTunerSettings<double2>, clblast::InvertTestValidArguments<double2>, clblast::InvertSetConstraints, clblast::InvertSetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/invert.hpp b/src/tuning/kernels/invert.hpp index 0178a2aa..0a0c9ce2 100644 --- a/src/tuning/kernels/invert.hpp +++ b/src/tuning/kernels/invert.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults InvertGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgN, kArgM, kArgK}; settings.default_n = 128; // dimension of input matrix 'n' @@ -32,7 +32,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings InvertGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -81,16 +81,16 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &args) { +void InvertTestValidArguments(const int, const Arguments<T> &args) { if (!(args.k == 16)) { throw std::runtime_error("'TripleMatMul16Part1Lower' requires 'k' to be 16"); } } -std::vector<Constraint> SetConstraints(const int) { return {}; } +std::vector<Constraint> InvertSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void InvertSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { const auto num_pages = CeilDiv(args.n, args.k * 2); // CeilDiv(n, current_size*2) kernel.SetArgument(0, static_cast<int>(args.n)); // n kernel.SetArgument(1, buffers[2]()); // 2 == A matrix diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp index 2144f23f..6b37a31d 100644 --- a/src/tuning/kernels/transpose_fast.cpp +++ b/src/tuning/kernels/transpose_fast.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<half>, clblast::TransposeTestValidArguments<half>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<float>, clblast::TransposeTestValidArguments<float>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<double>, clblast::TransposeTestValidArguments<double>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<float2>, clblast::TransposeTestValidArguments<float2>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::TransposeGetTunerDefaults, clblast::TransposeGetTunerSettings<double2>, clblast::TransposeTestValidArguments<double2>, clblast::TransposeSetConstraints, clblast::TransposeSetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/transpose_fast.hpp b/src/tuning/kernels/transpose_fast.hpp index 47859f7a..e8917ad2 100644 --- a/src/tuning/kernels/transpose_fast.hpp +++ b/src/tuning/kernels/transpose_fast.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults TransposeGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha}; settings.default_m = 1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings TransposeGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void TransposeTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> TransposeSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void TransposeSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, buffers[2]()); // 2 == A matrix kernel.SetArgument(2, buffers[3]()); // 3 == B matrix diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp index ce39d857..fc7244f6 100644 --- a/src/tuning/kernels/transpose_pad.cpp +++ b/src/tuning/kernels/transpose_pad.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<half>, clblast::PadtransposeTestValidArguments<half>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<float>, clblast::PadtransposeTestValidArguments<float>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<double>, clblast::PadtransposeTestValidArguments<double>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<float2>, clblast::PadtransposeTestValidArguments<float2>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::PadtransposeGetTunerDefaults, clblast::PadtransposeGetTunerSettings<double2>, clblast::PadtransposeTestValidArguments<double2>, clblast::PadtransposeSetConstraints, clblast::PadtransposeSetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/transpose_pad.hpp b/src/tuning/kernels/transpose_pad.hpp index c9ce983e..8d24a0dc 100644 --- a/src/tuning/kernels/transpose_pad.hpp +++ b/src/tuning/kernels/transpose_pad.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults PadtransposeGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha}; settings.default_m = 1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings PadtransposeGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -76,12 +76,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void PadtransposeTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> PadtransposeSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void PadtransposeSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); kernel.SetArgument(2, static_cast<int>(args.m)); diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp index a290b6c1..6a95600d 100644 --- a/src/tuning/kernels/xaxpy.cpp +++ b/src/tuning/kernels/xaxpy.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<half>, clblast::XaxpyTestValidArguments<half>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<float>, clblast::XaxpyTestValidArguments<float>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<double>, clblast::XaxpyTestValidArguments<double>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<float2>, clblast::XaxpyTestValidArguments<float2>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::XaxpyGetTunerDefaults, clblast::XaxpyGetTunerSettings<double2>, clblast::XaxpyTestValidArguments<double2>, clblast::XaxpySetConstraints, clblast::XaxpySetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/xaxpy.hpp b/src/tuning/kernels/xaxpy.hpp index 266a671b..24550ed9 100644 --- a/src/tuning/kernels/xaxpy.hpp +++ b/src/tuning/kernels/xaxpy.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults XaxpyGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgN, kArgAlpha}; settings.default_n = 4096*1024; @@ -30,7 +30,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings XaxpyGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -75,16 +75,16 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &args) { +void XaxpyTestValidArguments(const int, const Arguments<T> &args) { if (!IsMultiple(args.n, 64)) { throw std::runtime_error("'XaxpyFastest' requires 'n' to be a multiple of WGS*WPT*VW"); } } -std::vector<Constraint> SetConstraints(const int) { return {}; } +std::vector<Constraint> XaxpySetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XaxpySetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.n)); kernel.SetArgument(1, GetRealArg(args.alpha)); kernel.SetArgument(2, buffers[0]()); // 0 == X vector diff --git a/src/tuning/kernels/xdot.cpp b/src/tuning/kernels/xdot.cpp index 10126392..6d10c4d8 100644 --- a/src/tuning/kernels/xdot.cpp +++ b/src/tuning/kernels/xdot.cpp @@ -24,11 +24,11 @@ template <int V> void StartVariation(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<half>, clblast::XdotTestValidArguments<half>, clblast::XdotSetConstraints, clblast::XdotSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<float>, clblast::XdotTestValidArguments<float>, clblast::XdotSetConstraints, clblast::XdotSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<double>, clblast::XdotTestValidArguments<double>, clblast::XdotSetConstraints, clblast::XdotSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<float2>, clblast::XdotTestValidArguments<float2>, clblast::XdotSetConstraints, clblast::XdotSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XdotGetTunerDefaults, clblast::XdotGetTunerSettings<double2>, clblast::XdotTestValidArguments<double2>, clblast::XdotSetConstraints, clblast::XdotSetArguments<double2>); break; } } diff --git a/src/tuning/kernels/xdot.hpp b/src/tuning/kernels/xdot.hpp index 456baea4..15673c79 100644 --- a/src/tuning/kernels/xdot.hpp +++ b/src/tuning/kernels/xdot.hpp @@ -22,7 +22,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults XdotGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgN}; settings.default_n = 2*1024*1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { +TunerSettings XdotGetTunerSettings(const int V, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -74,12 +74,12 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void XdotTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> XdotSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XdotSetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { if (V == 1) { kernel.SetArgument(0, static_cast<int>(args.n)); kernel.SetArgument(1, buffers[0]()); // 0 == X vector diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp index f388c76c..d365ce6d 100644 --- a/src/tuning/kernels/xgemm.cpp +++ b/src/tuning/kernels/xgemm.cpp @@ -23,11 +23,11 @@ template <int V> void StartVariation(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<half>, clblast::XgemmTestValidArguments<half>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<float>, clblast::XgemmTestValidArguments<float>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<double>, clblast::XgemmTestValidArguments<double>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<float2>, clblast::XgemmTestValidArguments<float2>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemmGetTunerDefaults, clblast::XgemmGetTunerSettings<double2>, clblast::XgemmTestValidArguments<double2>, clblast::XgemmSetConstraints, clblast::XgemmSetArguments<double2>); break; } } diff --git a/src/tuning/kernels/xgemm.hpp b/src/tuning/kernels/xgemm.hpp index 09ebe91d..66e197e1 100644 --- a/src/tuning/kernels/xgemm.hpp +++ b/src/tuning/kernels/xgemm.hpp @@ -23,7 +23,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int V) { +TunerDefaults XgemmGetTunerDefaults(const int V) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction, kArgHeuristicSelection, kArgPsoSwarmSize, @@ -38,7 +38,7 @@ TunerDefaults GetTunerDefaults(const int V) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { +TunerSettings XgemmGetTunerSettings(const int V, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -118,8 +118,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int V) { +void XgemmTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> XgemmSetConstraints(const int V) { auto constraints = std::vector<Constraint>(); auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); }; auto MultipleOfXMulY = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]*v[2]); }; @@ -148,7 +148,7 @@ std::vector<Constraint> SetConstraints(const int V) { // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XgemmSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); kernel.SetArgument(2, static_cast<int>(args.k)); diff --git a/src/tuning/kernels/xgemm_direct.cpp b/src/tuning/kernels/xgemm_direct.cpp index b059ad62..7298a6c3 100644 --- a/src/tuning/kernels/xgemm_direct.cpp +++ b/src/tuning/kernels/xgemm_direct.cpp @@ -23,11 +23,11 @@ template <int V> void StartVariation(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<half>, clblast::XgemmDirectTestValidArguments<half>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<float>, clblast::XgemmDirectTestValidArguments<float>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<double>, clblast::XgemmDirectTestValidArguments<double>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<float2>, clblast::XgemmDirectTestValidArguments<float2>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemmDirectGetTunerDefaults, clblast::XgemmDirectGetTunerSettings<double2>, clblast::XgemmDirectTestValidArguments<double2>, clblast::XgemmDirectSetConstraints, clblast::XgemmDirectSetArguments<double2>); break; } } diff --git a/src/tuning/kernels/xgemm_direct.hpp b/src/tuning/kernels/xgemm_direct.hpp index 7cc9f654..ecb10bc6 100644 --- a/src/tuning/kernels/xgemm_direct.hpp +++ b/src/tuning/kernels/xgemm_direct.hpp @@ -23,7 +23,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int V) { +TunerDefaults XgemmDirectGetTunerDefaults(const int V) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgK, kArgAlpha, kArgBeta, kArgFraction, kArgHeuristicSelection, kArgPsoSwarmSize, @@ -38,7 +38,7 @@ TunerDefaults GetTunerDefaults(const int V) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { +TunerSettings XgemmDirectGetTunerSettings(const int V, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -109,8 +109,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int V) { +void XgemmDirectTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> XgemmDirectSetConstraints(const int V) { auto constraints = std::vector<Constraint>(); auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); }; auto MultipleOfXMulY = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]*v[2]); }; @@ -138,7 +138,7 @@ std::vector<Constraint> SetConstraints(const int V) { // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XgemmDirectSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); kernel.SetArgument(2, static_cast<int>(args.k)); diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp index 165c5628..9e45d73f 100644 --- a/src/tuning/kernels/xgemv.cpp +++ b/src/tuning/kernels/xgemv.cpp @@ -23,11 +23,11 @@ template <int V> void StartVariation(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<half>, clblast::XgemvTestValidArguments<half>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<float>, clblast::XgemvTestValidArguments<float>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<double>, clblast::XgemvTestValidArguments<double>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<float2>, clblast::XgemvTestValidArguments<float2>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, V, clblast::XgemvGetTunerDefaults, clblast::XgemvGetTunerSettings<double2>, clblast::XgemvTestValidArguments<double2>, clblast::XgemvSetConstraints, clblast::XgemvSetArguments<double2>); break; } } diff --git a/src/tuning/kernels/xgemv.hpp b/src/tuning/kernels/xgemv.hpp index 87186195..e44efe32 100644 --- a/src/tuning/kernels/xgemv.hpp +++ b/src/tuning/kernels/xgemv.hpp @@ -24,7 +24,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults XgemvGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha, kArgBeta}; settings.default_m = 2048; @@ -35,7 +35,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { +TunerSettings XgemvGetTunerSettings(const int V, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -96,8 +96,8 @@ TunerSettings GetTunerSettings(const int V, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int V) { +void XgemvTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> XgemvSetConstraints(const int V) { auto constraints = std::vector<Constraint>(); if (V==2 || V==3) { auto MultipleOfX = [] (std::vector<size_t> v) { return IsMultiple(v[0], v[1]); }; @@ -112,7 +112,7 @@ std::vector<Constraint> SetConstraints(const int V) { // Sets the kernel's arguments template <typename T> -void SetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XgemvSetArguments(const int V, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { auto a_rotated = (V==3) ? 1 : 0; kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp index 8a90f340..6dfc9ffa 100644 --- a/src/tuning/kernels/xger.cpp +++ b/src/tuning/kernels/xger.cpp @@ -22,11 +22,11 @@ using double2 = clblast::double2; int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); switch(clblast::GetPrecision(command_line_args)) { - case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<half>, clblast::TestValidArguments<half>, clblast::SetConstraints, clblast::SetArguments<half>); break; - case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float>, clblast::TestValidArguments<float>, clblast::SetConstraints, clblast::SetArguments<float>); break; - case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double>, clblast::TestValidArguments<double>, clblast::SetConstraints, clblast::SetArguments<double>); break; - case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<float2>, clblast::TestValidArguments<float2>, clblast::SetConstraints, clblast::SetArguments<float2>); break; - case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::GetTunerDefaults, clblast::GetTunerSettings<double2>, clblast::TestValidArguments<double2>, clblast::SetConstraints, clblast::SetArguments<double2>); break; + case clblast::Precision::kHalf: clblast::Tuner<half>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<half>, clblast::XgerTestValidArguments<half>, clblast::XgerSetConstraints, clblast::XgerSetArguments<half>); break; + case clblast::Precision::kSingle: clblast::Tuner<float>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<float>, clblast::XgerTestValidArguments<float>, clblast::XgerSetConstraints, clblast::XgerSetArguments<float>); break; + case clblast::Precision::kDouble: clblast::Tuner<double>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<double>, clblast::XgerTestValidArguments<double>, clblast::XgerSetConstraints, clblast::XgerSetArguments<double>); break; + case clblast::Precision::kComplexSingle: clblast::Tuner<float2>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<float2>, clblast::XgerTestValidArguments<float2>, clblast::XgerSetConstraints, clblast::XgerSetArguments<float2>); break; + case clblast::Precision::kComplexDouble: clblast::Tuner<double2>(argc, argv, 0, clblast::XgerGetTunerDefaults, clblast::XgerGetTunerSettings<double2>, clblast::XgerTestValidArguments<double2>, clblast::XgerSetConstraints, clblast::XgerSetArguments<double2>); break; } return 0; } diff --git a/src/tuning/kernels/xger.hpp b/src/tuning/kernels/xger.hpp index 01d85c9e..afd2f36e 100644 --- a/src/tuning/kernels/xger.hpp +++ b/src/tuning/kernels/xger.hpp @@ -21,7 +21,7 @@ namespace clblast { // ================================================================================================= // Settings for this kernel (default command-line arguments) -TunerDefaults GetTunerDefaults(const int) { +TunerDefaults XgerGetTunerDefaults(const int) { auto settings = TunerDefaults(); settings.options = {kArgM, kArgN, kArgAlpha}; settings.default_m = 1024; @@ -31,7 +31,7 @@ TunerDefaults GetTunerDefaults(const int) { // Settings for this kernel (general) template <typename T> -TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { +TunerSettings XgerGetTunerSettings(const int, const Arguments<T> &args) { auto settings = TunerSettings(); // Identification of the kernel @@ -77,12 +77,12 @@ TunerSettings GetTunerSettings(const int, const Arguments<T> &args) { // Tests for valid arguments template <typename T> -void TestValidArguments(const int, const Arguments<T> &) { } -std::vector<Constraint> SetConstraints(const int) { return {}; } +void XgerTestValidArguments(const int, const Arguments<T> &) { } +std::vector<Constraint> XgerSetConstraints(const int) { return {}; } // Sets the kernel's arguments template <typename T> -void SetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { +void XgerSetArguments(const int, Kernel &kernel, const Arguments<T> &args, std::vector<Buffer<T>>& buffers) { kernel.SetArgument(0, static_cast<int>(args.m)); kernel.SetArgument(1, static_cast<int>(args.n)); kernel.SetArgument(2, GetRealArg(args.alpha)); diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp index 4ffb46c2..5900378a 100644 --- a/src/tuning/tuning_api.cpp +++ b/src/tuning/tuning_api.cpp @@ -17,10 +17,19 @@ #include <random> #include <utility> #include <algorithm> -#include <cstdio> #include "tuning/tuning.hpp" +#include "tuning/kernels/xaxpy.hpp" +#include "tuning/kernels/xdot.hpp" +#include "tuning/kernels/xgemv.hpp" +#include "tuning/kernels/xger.hpp" +#include "tuning/kernels/xgemm.hpp" +#include "tuning/kernels/xgemm_direct.hpp" #include "tuning/kernels/copy_fast.hpp" +#include "tuning/kernels/copy_pad.hpp" +#include "tuning/kernels/transpose_fast.hpp" +#include "tuning/kernels/transpose_pad.hpp" +#include "tuning/kernels/invert.hpp" namespace clblast { // ================================================================================================= @@ -30,8 +39,8 @@ StatusCode TuneXaxpy(RawCommandQueue * queue, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, XaxpyGetTunerDefaults, XaxpyGetTunerSettings<T>, + XaxpyTestValidArguments<T>, XaxpySetConstraints, XaxpySetArguments<T>, parameters); } template StatusCode TuneXaxpy<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXaxpy<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -44,11 +53,11 @@ StatusCode TuneXdot(RawCommandQueue * queue, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.n = n; auto queue_cpp = Queue(*queue); - auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + auto status = TunerAPI<T>(queue_cpp, args, 1, XdotGetTunerDefaults, XdotGetTunerSettings<T>, + XdotTestValidArguments<T>, XdotSetConstraints, XdotSetArguments<T>, parameters); if (status != StatusCode::kSuccess) { return status; } - return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 2, XdotGetTunerDefaults, XdotGetTunerSettings<T>, + XdotTestValidArguments<T>, XdotSetConstraints, XdotSetArguments<T>, parameters); } template StatusCode TuneXdot<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXdot<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -61,14 +70,14 @@ StatusCode TuneXgemv(RawCommandQueue * queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + auto status = TunerAPI<T>(queue_cpp, args, 1, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>, + XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters); if (status != StatusCode::kSuccess) { return status; } - status = TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + status = TunerAPI<T>(queue_cpp, args, 2, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>, + XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters); if (status != StatusCode::kSuccess) { return status; } - return TunerAPI<T>(queue_cpp, args, 3, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 3, XgemvGetTunerDefaults, XgemvGetTunerSettings<T>, + XgemvTestValidArguments<T>, XgemvSetConstraints, XgemvSetArguments<T>, parameters); } template StatusCode TuneXgemv<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXgemv<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -81,8 +90,8 @@ StatusCode TuneXger(RawCommandQueue * queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, XgerGetTunerDefaults, XgerGetTunerSettings<T>, + XgerTestValidArguments<T>, XgerSetConstraints, XgerSetArguments<T>, parameters); } template StatusCode TuneXger<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXger<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -95,8 +104,8 @@ StatusCode TuneXgemm(RawCommandQueue * queue, const size_t m, const size_t n, co const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 2, XgemmGetTunerDefaults, XgemmGetTunerSettings<T>, + XgemmTestValidArguments<T>, XgemmSetConstraints, XgemmSetArguments<T>, parameters); } template StatusCode TuneXgemm<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXgemm<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -109,8 +118,8 @@ StatusCode TuneXgemmDirect(RawCommandQueue * queue, const size_t m, const size_t const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 2, XgemmDirectGetTunerDefaults, XgemmDirectGetTunerSettings<T>, + XgemmDirectTestValidArguments<T>, XgemmDirectSetConstraints, XgemmDirectSetArguments<T>, parameters); } template StatusCode TuneXgemmDirect<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneXgemmDirect<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -123,8 +132,8 @@ StatusCode TuneCopy(RawCommandQueue * queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, CopyGetTunerDefaults, CopyGetTunerSettings<T>, + CopyTestValidArguments<T>, CopySetConstraints, CopySetArguments<T>, parameters); } template StatusCode TuneCopy<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneCopy<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -137,8 +146,8 @@ StatusCode TunePad(RawCommandQueue * queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, PadGetTunerDefaults, PadGetTunerSettings<T>, + PadTestValidArguments<T>, PadSetConstraints, PadSetArguments<T>, parameters); } template StatusCode TunePad<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TunePad<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -151,8 +160,8 @@ StatusCode TuneTranspose(RawCommandQueue * queue, const size_t m, const size_t n const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, TransposeGetTunerDefaults, TransposeGetTunerSettings<T>, + TransposeTestValidArguments<T>, TransposeSetConstraints, TransposeSetArguments<T>, parameters); } template StatusCode TuneTranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneTranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -165,8 +174,8 @@ StatusCode TunePadtranspose(RawCommandQueue * queue, const size_t m, const size_ const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, PadtransposeGetTunerDefaults, PadtransposeGetTunerSettings<T>, + PadtransposeTestValidArguments<T>, PadtransposeSetConstraints, PadtransposeSetArguments<T>, parameters); } template StatusCode TunePadtranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TunePadtranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); @@ -179,8 +188,8 @@ StatusCode TuneInvert(RawCommandQueue * queue, const size_t m, const size_t n, c const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; auto queue_cpp = Queue(*queue); - return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, - TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + return TunerAPI<T>(queue_cpp, args, 0, InvertGetTunerDefaults, InvertGetTunerSettings<T>, + InvertTestValidArguments<T>, InvertSetConstraints, InvertSetArguments<T>, parameters); } template StatusCode TuneInvert<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TuneInvert<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); |