// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This class implements the performance-test client. It is generic for all CLBlast routines by // taking a number of routine-specific functions as arguments, such as how to compute buffer sizes // or how to get the FLOPS count. // Typename T: the data-type of the routine's memory buffers (==precision) // Typename U: the data-type of the alpha and beta arguments // // This file also provides the common interface to the performance client (see the 'RunClient' // function for details). // // ================================================================================================= #ifndef CLBLAST_TEST_PERFORMANCE_CLIENT_H_ #define CLBLAST_TEST_PERFORMANCE_CLIENT_H_ #include #include #include #include "test/test_utilities.hpp" // The libraries to test #ifdef CLBLAST_REF_CLBLAS #include #endif #include "test/wrapper_cuda.hpp" #include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class template class Client { public: static const int kSeed; struct TimeResult { double minimum; double maximum; double mean; double standard_deviation; }; // Shorthand for the routine-specific functions passed to the tester using Routine = std::function&, Buffers&, Queue&)>; using Reference1 = std::function&, Buffers&, Queue&)>; using Reference2 = std::function&, BuffersHost&, Queue&)>; using Reference3 = std::function&, BuffersCUDA&, Queue&)>; using SetMetric = std::function&, Queue&)>; using GetMetric = std::function&)>; // The constructor Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2, const Reference3 run_reference3, const std::vector &options, const std::vector &buffers_in, const std::vector &buffers_out, const GetMetric get_flops, const GetMetric get_bytes); // Parses all command-line arguments, filling in the arguments structure. If no command-line // argument is given for a particular argument, it is filled in with a default value. Arguments ParseArguments(int argc, char *argv[], const size_t level, const GetMetric default_a_ld, const GetMetric default_b_ld, const GetMetric default_c_ld); // The main client function, setting-up arguments, matrices, OpenCL buffers, etc. After set-up, it // calls the client routines. void PerformanceTest(Arguments &args, const SetMetric set_sizes); private: // Runs a function a given number of times and returns the execution time of the shortest instance template TimeResult TimedExecution(const size_t num_runs, const Arguments &args, BufferType &buffers, Queue &queue, RoutineType run_blas, const std::string &library_name); // Prints the header of a performance-data table void PrintTableHeader(const Arguments& args); // Prints a row of performance data, including results of two libraries void PrintTableRow(const Arguments& args, const std::vector>& timings); // The routine-specific functions passed to the tester const Routine run_routine_; const Reference1 run_reference1_; const Reference2 run_reference2_; const Reference3 run_reference3_; const std::vector options_; const std::vector buffers_in_; const std::vector buffers_out_; const GetMetric get_flops_; const GetMetric get_bytes_; // Extra arguments bool warm_up_; // if enabled, do a warm-up run first before measuring execution time }; // ================================================================================================= // Bogus reference function, in case a comparison library is not available template static StatusCode ReferenceNotAvailable(const Arguments &, BufferType &, Queue &) { return StatusCode::kNotImplemented; } // The interface to the performance client. This is a separate function in the header such that it // is automatically compiled for each routine, templated by the parameter "C". template void RunClient(int argc, char *argv[]) { // Sets the reference to test against #ifdef CLBLAST_REF_CLBLAS auto reference1 = C::RunReference1; // clBLAS when available #else auto reference1 = ReferenceNotAvailable>; #endif #ifdef CLBLAST_REF_CBLAS auto reference2 = C::RunReference2; // CBLAS when available #else auto reference2 = ReferenceNotAvailable>; #endif #ifdef CLBLAST_REF_CUBLAS auto reference3 = C::RunReference3; // cuBLAS when available #else auto reference3 = ReferenceNotAvailable>; #endif // Creates a new client auto client = Client(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(), C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes); // Simple command line argument parser with defaults auto args = client.ParseArguments(argc, argv, C::BLASLevel(), C::DefaultLDA, C::DefaultLDB, C::DefaultLDC); if (args.print_help) { return; } // Runs the client client.PerformanceTest(args, C::SetSizes); } // ================================================================================================= } // namespace clblast // CLBLAST_TEST_PERFORMANCE_CLIENT_H_ #endif