From 2fe3fe15801f8ef11b38bfd93d7d68fbb37253a1 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Fri, 10 Jul 2015 07:18:12 +0200 Subject: The clients now distinguish between the memory and alpha/beta data-type --- test/performance/client.cc | 52 +++++++++++++++++++------------------ test/performance/client.h | 24 +++++++++-------- test/performance/routines/xaxpy.cc | 12 ++++++--- test/performance/routines/xgemm.cc | 12 ++++++--- test/performance/routines/xgemv.cc | 12 ++++++--- test/performance/routines/xsymm.cc | 12 ++++++--- test/performance/routines/xsyr2k.cc | 12 ++++++--- test/performance/routines/xsyrk.cc | 12 ++++++--- test/performance/routines/xtrmm.cc | 12 ++++++--- 9 files changed, 96 insertions(+), 64 deletions(-) diff --git a/test/performance/client.cc b/test/performance/client.cc index fad0f3a9..676e88e4 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -22,10 +22,10 @@ namespace clblast { // ================================================================================================= // Constructor -template -Client::Client(const Routine run_routine, const Routine run_reference, - const std::vector &options, - const GetMetric get_flops, const GetMetric get_bytes): +template +Client::Client(const Routine run_routine, const Routine run_reference, + const std::vector &options, + const GetMetric get_flops, const GetMetric get_bytes): run_routine_(run_routine), run_reference_(run_reference), options_(options), @@ -38,10 +38,10 @@ Client::Client(const Routine run_routine, const Routine run_reference, // Parses all arguments available for the CLBlast client testers. Some arguments might not be // applicable, but are searched for anyway to be able to create one common argument parser. All // arguments have a default value in case they are not found. -template -Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric default_a_ld, - const GetMetric default_b_ld, const GetMetric default_c_ld) { - auto args = Arguments{}; +template +Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric default_a_ld, + const GetMetric default_b_ld, const GetMetric default_c_ld) { + auto args = Arguments{}; auto help = std::string{"Options given/available:\n"}; // These are the options which are not for every client: they are optional @@ -75,8 +75,8 @@ Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric d if (o == kArgCOffset) { args.c_offset = GetArgument(argc, argv, help, kArgCOffset, size_t{0}); } // Scalar values - if (o == kArgAlpha) { args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar()); } - if (o == kArgBeta) { args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar()); } + if (o == kArgAlpha) { args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar()); } + if (o == kArgBeta) { args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar()); } } // These are the options common to all routines @@ -102,8 +102,8 @@ Arguments Client::ParseArguments(int argc, char *argv[], const GetMetric d // ================================================================================================= // This is main performance tester -template -void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) { +template +void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) { // Prints the header of the output table PrintTableHeader(args.silent, options_); @@ -174,10 +174,10 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) { // Creates a vector of timing results, filled with execution times of the 'main computation'. The // timing is performed using the milliseconds chrono functions. The function returns the minimum // value found in the vector of timing results. The return value is in milliseconds. -template -double Client::TimedExecution(const size_t num_runs, const Arguments &args, - const Buffers &buffers, CommandQueue &queue, - Routine run_blas, const std::string &library_name) { +template +double Client::TimedExecution(const size_t num_runs, const Arguments &args, + const Buffers &buffers, CommandQueue &queue, + Routine run_blas, const std::string &library_name) { auto timings = std::vector(num_runs); for (auto &timing: timings) { auto start_time = std::chrono::steady_clock::now(); @@ -198,8 +198,8 @@ double Client::TimedExecution(const size_t num_runs, const Arguments &args // ================================================================================================= // Prints the header of the performance table -template -void Client::PrintTableHeader(const bool silent, const std::vector &args) { +template +void Client::PrintTableHeader(const bool silent, const std::vector &args) { if (!silent) { for (auto i=size_t{0}; i | <-- clBLAS --> |\n"); @@ -210,9 +210,9 @@ void Client::PrintTableHeader(const bool silent, const std::vector -void Client::PrintTableRow(const Arguments& args, const double ms_clblast, - const double ms_clblas) { +template +void Client::PrintTableRow(const Arguments& args, const double ms_clblast, + const double ms_clblas) { // Creates a vector of relevant variables auto integers = std::vector{}; @@ -276,10 +276,12 @@ void Client::PrintTableRow(const Arguments& args, const double ms_clblast, // ================================================================================================= // Compiles the templated class -template class Client; -template class Client; -template class Client; -template class Client; +template class Client; +template class Client; +template class Client; +template class Client; +template class Client; +template class Client; // ================================================================================================= } // namespace clblast diff --git a/test/performance/client.h b/test/performance/client.h index f9f219d0..c9095967 100644 --- a/test/performance/client.h +++ b/test/performance/client.h @@ -10,6 +10,8 @@ // This class implements the performance-test client. It is generic for all CLBlast routines by // taking a number of routine-specific functions as arguments, such as how to compute buffer sizes // or how to get the FLOPS count. +// Typename T: the data-type of the routine's memory buffers (==precision) +// Typename U: the data-type of the alpha and beta arguments // // This file also provides the common interface to the performance client (see the 'RunClient' // function for details). @@ -32,7 +34,7 @@ namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class -template +template class Client { public: @@ -40,9 +42,9 @@ class Client { const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; // Shorthand for the routine-specific functions passed to the tester - using Routine = std::function&, const Buffers&, CommandQueue&)>; - using SetMetric = std::function&)>; - using GetMetric = std::function&)>; + using Routine = std::function&, const Buffers&, CommandQueue&)>; + using SetMetric = std::function&)>; + using GetMetric = std::function&)>; // The constructor Client(const Routine run_routine, const Routine run_reference, @@ -51,24 +53,24 @@ class Client { // Parses all command-line arguments, filling in the arguments structure. If no command-line // argument is given for a particular argument, it is filled in with a default value. - Arguments ParseArguments(int argc, char *argv[], const GetMetric default_a_ld, + Arguments ParseArguments(int argc, char *argv[], const GetMetric default_a_ld, const GetMetric default_b_ld, const GetMetric default_c_ld); // The main client function, setting-up arguments, matrices, OpenCL buffers, etc. After set-up, it // calls the client routines. - void PerformanceTest(Arguments &args, const SetMetric set_sizes); + void PerformanceTest(Arguments &args, const SetMetric set_sizes); private: // Runs a function a given number of times and returns the execution time of the shortest instance - double TimedExecution(const size_t num_runs, const Arguments &args, const Buffers &buffers, + double TimedExecution(const size_t num_runs, const Arguments &args, const Buffers &buffers, CommandQueue &queue, Routine run_blas, const std::string &library_name); // Prints the header of a performance-data table void PrintTableHeader(const bool silent, const std::vector &args); // Prints a row of performance data, including results of two libraries - void PrintTableRow(const Arguments& args, const double ms_clblast, const double ms_clblas); + void PrintTableRow(const Arguments& args, const double ms_clblast, const double ms_clblas); // The routine-specific functions passed to the tester const Routine run_routine_; @@ -82,12 +84,12 @@ class Client { // The interface to the performance client. This is a separate function in the header such that it // is automatically compiled for each routine, templated by the parameter "C". -template +template void RunClient(int argc, char *argv[]) { // Creates a new client - auto client = Client(C::RunRoutine, C::RunReference, C::GetOptions(), - C::GetFlops, C::GetBytes); + auto client = Client(C::RunRoutine, C::RunReference, C::GetOptions(), + C::GetFlops, C::GetBytes); // Simple command line argument parser with defaults auto args = client.ParseArguments(argc, argv, C::DefaultLDA, C::DefaultLDB, C::DefaultLDC); diff --git a/test/performance/routines/xaxpy.cc b/test/performance/routines/xaxpy.cc index 3ced80ed..6a2b96c1 100644 --- a/test/performance/routines/xaxpy.cc +++ b/test/performance/routines/xaxpy.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xgemm.cc b/test/performance/routines/xgemm.cc index 36c74b9a..9a02e595 100644 --- a/test/performance/routines/xgemm.cc +++ b/test/performance/routines/xgemm.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xgemv.cc b/test/performance/routines/xgemv.cc index 183dd4a1..6f69ef21 100644 --- a/test/performance/routines/xgemv.cc +++ b/test/performance/routines/xgemv.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xsymm.cc b/test/performance/routines/xsymm.cc index 0c7f5e1e..8738ceda 100644 --- a/test/performance/routines/xsymm.cc +++ b/test/performance/routines/xsymm.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xsyr2k.cc b/test/performance/routines/xsyr2k.cc index 63b50df6..e4c76229 100644 --- a/test/performance/routines/xsyr2k.cc +++ b/test/performance/routines/xsyr2k.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xsyrk.cc b/test/performance/routines/xsyrk.cc index 9022d4f8..53fecb69 100644 --- a/test/performance/routines/xsyrk.cc +++ b/test/performance/routines/xsyrk.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/xtrmm.cc b/test/performance/routines/xtrmm.cc index 91dcbd07..2ab9ce77 100644 --- a/test/performance/routines/xtrmm.cc +++ b/test/performance/routines/xtrmm.cc @@ -16,19 +16,23 @@ // ================================================================================================= +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kSingle: - clblast::RunClient, float>(argc, argv); break; + clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: - clblast::RunClient, double>(argc, argv); break; + clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, clblast::float2>(argc, argv); break; + clblast::RunClient, float2, float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, clblast::double2>(argc, argv); break; + clblast::RunClient, double2, double2>(argc, argv); break; } return 0; } -- cgit v1.2.3