// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file tests any CLBlast routine. It contains two types of tests: one testing all sorts of // input combinations, and one deliberatly testing with invalid values. // Typename T: the data-type of the routine's memory buffers (==precision) // Typename U: the data-type of the alpha and beta arguments // // ================================================================================================= #ifndef CLBLAST_TEST_CORRECTNESS_TESTBLAS_H_ #define CLBLAST_TEST_CORRECTNESS_TESTBLAS_H_ #include #include #include #include "test/correctness/tester.hpp" namespace clblast { // ================================================================================================= // See comment at top of file for a description of the class template class TestBlas: public Tester { public: static const int kSeed; // Uses several variables from the Tester class using Tester::context_; using Tester::queue_; using Tester::full_test_; using Tester::verbose_; using Tester::device_; using Tester::compare_clblas_; using Tester::compare_cblas_; // Uses several helper functions from the Tester class using Tester::TestStart; using Tester::TestEnd; using Tester::TestErrorCount; using Tester::TestErrorCodes; using Tester::GetOffsets; using Tester::GetOptionsString; using Tester::GetSizesString; // Test settings for the regular test. Append to these lists in case more tests are required. static const std::vector kVectorDims; static const std::vector kIncrements; static const std::vector kMatrixDims; static const std::vector kMatrixVectorDims; static const std::vector kBandSizes; static const std::vector kPadSizes; static const std::vector kDilationSizes; static const std::vector kKernelSizes; static const std::vector kBatchCounts; static const std::vector kNumKernels; static const std::vector kStrideValues; static const std::vector kChannelValues; static const std::vector kKernelModes; const std::vector kOffsets; const std::vector kAlphaValues; const std::vector kBetaValues; // Test settings for the invalid tests static const std::vector kInvalidIncrements; static const size_t kBufferSize; static const std::vector kMatSizes; static const std::vector kVecSizes; // The layout/transpose/triangle options to test with static const std::vector kLayouts; static const std::vector kTriangles; static const std::vector kSides; static const std::vector kDiagonals; static const std::vector kTransposes; // Data-type dependent, see .cpp-file // Shorthand for the routine-specific functions passed to the tester using DataPrepare = std::function&, Queue&, const int, std::vector&, std::vector&, std::vector&, std::vector&, std::vector&, std::vector&, std::vector&)>; using Routine = std::function&, Buffers&, Queue&)>; using ResultGet = std::function(const Arguments&, Buffers&, Queue&)>; using ResultIndex = std::function&, const size_t, const size_t)>; using ResultIterator = std::function&)>; // Constructor, initializes the base class tester and input data TestBlas(const std::vector &arguments, const bool silent, const std::string &name, const std::vector &options, const DataPrepare prepare_data, const Routine run_routine, const Routine run_reference1, const Routine run_reference2, const ResultGet get_result, const ResultIndex get_index, const ResultIterator get_id1, const ResultIterator get_id2); // The test functions, taking no inputs void TestRegular(std::vector> &test_vector, const std::string &name); void TestInvalid(std::vector> &test_vector, const std::string &name); private: // Source data to test with std::vector x_source_; std::vector y_source_; std::vector a_source_; std::vector b_source_; std::vector c_source_; std::vector ap_source_; std::vector scalar_source_; // The routine-specific functions passed to the tester const DataPrepare prepare_data_; const Routine run_routine_; const Routine run_reference1_; const Routine run_reference2_; const ResultGet get_result_; const ResultIndex get_index_; const ResultIterator get_id1_; const ResultIterator get_id2_; }; // ================================================================================================= template const int TestBlas::kSeed = 42; // fixed seed for reproducibility // Test settings for the regular test. Append to these lists in case more tests are required. template const std::vector TestBlas::kVectorDims = { 7, 93, 144, 4096 }; template const std::vector TestBlas::kIncrements = { 1, 2, 7 }; template const std::vector TestBlas::kMatrixDims = { 7, 64 }; template const std::vector TestBlas::kMatrixVectorDims = { 61, 256 }; template const std::vector TestBlas::kBandSizes = { 4, 19 }; template const std::vector TestBlas::kBatchCounts = { 1, 3 }; template const std::vector TestBlas::kPadSizes = { 0, 1 }; template const std::vector TestBlas::kDilationSizes = { 1, 2 }; template const std::vector TestBlas::kKernelSizes = { 1, 3 }; template const std::vector TestBlas::kNumKernels = { 1, 6 }; template const std::vector TestBlas::kStrideValues = { 1, 3 }; template const std::vector TestBlas::kChannelValues = { 1, 2 }; template const std::vector TestBlas::kKernelModes = { KernelMode::kCrossCorrelation, KernelMode::kConvolution }; // Test settings for the invalid tests template const std::vector TestBlas::kInvalidIncrements = { 0, 1 }; template const size_t TestBlas::kBufferSize = 64; template const std::vector TestBlas::kMatSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize}; template const std::vector TestBlas::kVecSizes = {0, kBufferSize - 1, kBufferSize}; // The layout/triangle options to test with template const std::vector TestBlas::kLayouts = {Layout::kRowMajor, Layout::kColMajor}; template const std::vector TestBlas::kTriangles = {Triangle::kUpper, Triangle::kLower}; template const std::vector TestBlas::kSides = {Side::kLeft, Side::kRight}; template const std::vector TestBlas::kDiagonals = {Diagonal::kUnit, Diagonal::kNonUnit}; // The transpose configurations to test with: template parameter dependent, see .cpp file for implementation template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; template <> const std::vector TestBlas::kTransposes; // ================================================================================================= // Bogus reference function, in case a comparison library is not available template static StatusCode ReferenceNotAvailable(const Arguments &, BufferType &, Queue &) { return StatusCode::kNotImplemented; } // Helper for the below function: MSVC's C1061 error requires part of the for-loops to be in a // separate file. This part handles the im2col/xconv arguments. template void handle_remaining_of_options(std::vector> ®ular_test_vector, Arguments &r_args, TestBlas &tester, const std::vector &kernel_modes, const std::vector &channelss, const std::vector &heights, const std::vector &widths, const std::vector &kernel_hs, const std::vector &kernel_ws, const std::vector &pad_hs, const std::vector &pad_ws, const std::vector &stride_hs, const std::vector &stride_ws, const std::vector &dilation_hs, const std::vector &dilation_ws, const std::vector &batch_counts, const std::vector &num_kernelss) { for (auto &kernel_mode: kernel_modes) { r_args.kernel_mode = kernel_mode; for (auto &channels: channelss) { r_args.channels = channels; for (auto &height: heights) { r_args.height = height; for (auto &width: widths) { r_args.width = width; for (auto &kernel_h: kernel_hs) { r_args.kernel_h = kernel_h; for (auto &kernel_w: kernel_ws) { r_args.kernel_w = kernel_w; for (auto &pad_h: pad_hs) { r_args.pad_h = pad_h; for (auto &pad_w: pad_ws) { r_args.pad_w = pad_w; for (auto &stride_h: stride_hs) { r_args.stride_h = stride_h; for (auto &stride_w: stride_ws) { r_args.stride_w = stride_w; for (auto &dilation_h: dilation_hs) { r_args.dilation_h = dilation_h; for (auto &dilation_w: dilation_ws) { r_args.dilation_w = dilation_w; for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count; for (auto &num_kernels: num_kernelss) { r_args.num_kernels = num_kernels; C::SetSizes(r_args, tester.queue_); regular_test_vector.push_back(r_args); } } } } } } } } } } } } } } } // The interface to the correctness tester. This is a separate function in the header such that it // is automatically compiled for each routine, templated by the parameter "C". template size_t RunTests(int argc, char *argv[], const bool silent, const std::string &name) { auto command_line_args = RetrieveCommandLineArguments(argc, argv); // Sets the clBLAS reference to test against #ifdef CLBLAST_REF_CLBLAS auto reference_routine1 = C::RunReference1; // clBLAS when available #else auto reference_routine1 = ReferenceNotAvailable>; #endif // Sets the CBLAS reference to test against #ifdef CLBLAST_REF_CBLAS auto reference_routine2 = [](const Arguments &args, Buffers &buffers, Queue &queue) -> StatusCode { auto buffers_host = BuffersHost(); DeviceToHost(args, buffers, buffers_host, queue, C::BuffersIn()); C::RunReference2(args, buffers_host, queue); HostToDevice(args, buffers, buffers_host, queue, C::BuffersOut()); return StatusCode::kSuccess; }; #else auto reference_routine2 = ReferenceNotAvailable>; #endif // Non-BLAS routines cannot be fully tested if (!silent && C::BLASLevel() == 4) { fprintf(stdout, "\n* NOTE: This non-BLAS routine is tested against a custom implementation,\n"); fprintf(stdout, " not against clBLAS or a CPU BLAS library. Thus, the arguments '-clblas'\n"); fprintf(stdout, " and '-cblas' have no effect.\n"); } // Creates a tester auto options = C::GetOptions(); TestBlas tester{command_line_args, silent, name, options, C::PrepareData, C::RunRoutine, reference_routine1, reference_routine2, C::DownloadResult, C::GetResultIndex, C::ResultID1, C::ResultID2}; // This variable holds the arguments relevant for this routine auto args = Arguments{}; // Initializes the vectors with a single element. If this particular option is relevant for this // routine, this vector is overridden. Otherwise, it is unused - the value here does not matter. auto ms = std::vector{args.m}; auto ns = std::vector{args.n}; auto ks = std::vector{args.k}; auto kus = std::vector{args.ku}; auto kls = std::vector{args.kl}; auto layouts = std::vector{args.layout}; auto a_transposes = std::vector{args.a_transpose}; auto b_transposes = std::vector{args.b_transpose}; auto sides = std::vector{args.side}; auto triangles = std::vector{args.triangle}; auto diagonals = std::vector{args.diagonal}; auto x_incs = std::vector{args.x_inc}; auto y_incs = std::vector{args.y_inc}; auto x_offsets = std::vector{args.x_offset}; auto y_offsets = std::vector{args.y_offset}; auto a_lds = std::vector{args.a_ld}; auto b_lds = std::vector{args.b_ld}; auto c_lds = std::vector{args.c_ld}; auto a_offsets = std::vector{args.a_offset}; auto b_offsets = std::vector{args.b_offset}; auto c_offsets = std::vector{args.c_offset}; auto ap_offsets = std::vector{args.ap_offset}; auto dot_offsets = std::vector{args.dot_offset}; auto nrm2_offsets = std::vector{args.nrm2_offset}; auto asum_offsets = std::vector{args.asum_offset}; auto imax_offsets = std::vector{args.imax_offset}; auto alphas = std::vector{args.alpha}; auto betas = std::vector{args.beta}; auto kernel_modes = std::vector{args.kernel_mode}; auto channelss = std::vector{args.channels}; auto heights = std::vector{args.height}; auto widths = std::vector{args.width}; auto kernel_hs = std::vector{args.kernel_h}; auto kernel_ws = std::vector{args.kernel_w}; auto pad_hs = std::vector{args.pad_h}; auto pad_ws = std::vector{args.pad_w}; auto stride_hs = std::vector{args.stride_h}; auto stride_ws = std::vector{args.stride_w}; auto dilation_hs = std::vector{args.dilation_h}; auto dilation_ws = std::vector{args.dilation_w}; auto batch_counts = std::vector{args.batch_count}; auto num_kernelss = std::vector{args.num_kernels}; auto x_sizes = std::vector{args.x_size}; auto y_sizes = std::vector{args.y_size}; auto a_sizes = std::vector{args.a_size}; auto b_sizes = std::vector{args.b_size}; auto c_sizes = std::vector{args.c_size}; auto ap_sizes = std::vector{args.ap_size}; // Sets the dimensions of the matrices or vectors depending on the BLAS level auto dimensions = (C::BLASLevel() == 4) ? tester.kMatrixDims : // non-BLAS extra routines (C::BLASLevel() == 3) ? tester.kMatrixDims : // level 3 (C::BLASLevel() == 2) ? tester.kMatrixVectorDims : // level 2 tester.kVectorDims; // else: level 1 // For the options relevant to this routine, sets the vectors to proper values for (auto &option: options) { if (option == kArgM) { ms = dimensions; } if (option == kArgN) { ns = dimensions; } if (option == kArgK) { ks = dimensions; } if (option == kArgKU) { kus = tester.kBandSizes; } if (option == kArgKL) { kls = tester.kBandSizes; } if (option == kArgLayout) { layouts = tester.kLayouts; } if (option == kArgATransp) { a_transposes = C::GetATransposes(tester.kTransposes); } if (option == kArgBTransp) { b_transposes = C::GetBTransposes(tester.kTransposes); } if (option == kArgSide) { sides = tester.kSides; } if (option == kArgTriangle) { triangles = tester.kTriangles; } if (option == kArgDiagonal) { diagonals = tester.kDiagonals; } if (option == kArgXInc) { x_incs = tester.kIncrements; } if (option == kArgYInc) { y_incs = tester.kIncrements; } if (option == kArgXOffset) { x_offsets = tester.kOffsets; } if (option == kArgYOffset) { y_offsets = tester.kOffsets; } if (option == kArgALeadDim) { a_lds = dimensions; } if (option == kArgBLeadDim) { b_lds = dimensions; } if (option == kArgCLeadDim) { c_lds = dimensions; } if (option == kArgAOffset) { a_offsets = tester.kOffsets; } if (option == kArgBOffset) { b_offsets = tester.kOffsets; } if (option == kArgCOffset) { c_offsets = tester.kOffsets; } if (option == kArgAPOffset) { ap_offsets = tester.kOffsets; } if (option == kArgDotOffset) { dot_offsets = tester.kOffsets; } if (option == kArgNrm2Offset) { nrm2_offsets = tester.kOffsets; } if (option == kArgAsumOffset) { asum_offsets = tester.kOffsets; } if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; } if (option == kArgAlpha) { alphas = tester.kAlphaValues; } if (option == kArgBeta) { betas = tester.kBetaValues; } if (option == kArgKernelMode) { kernel_modes = tester.kKernelModes; } if (option == kArgChannels) { channelss = tester.kChannelValues; } if (option == kArgHeight) { heights = tester.kMatrixDims; } if (option == kArgWidth) { widths = tester.kMatrixDims; } if (option == kArgKernelH) { kernel_hs = tester.kKernelSizes; } if (option == kArgKernelW) { kernel_ws = tester.kKernelSizes; } if (option == kArgPadH) { pad_hs = tester.kPadSizes; } if (option == kArgPadW) { pad_ws = tester.kPadSizes; } if (option == kArgStrideH) { stride_hs = tester.kStrideValues; } if (option == kArgStrideW) { stride_ws = tester.kStrideValues; } if (option == kArgDilationH) { dilation_hs = tester.kDilationSizes; } if (option == kArgDilationW) { dilation_ws = tester.kDilationSizes; } if (option == kArgBatchCount) { batch_counts = tester.kBatchCounts; } if (option == kArgNumKernels) { num_kernelss = tester.kNumKernels; } if (option == kArgXOffset) { x_sizes = tester.kVecSizes; } if (option == kArgYOffset) { y_sizes = tester.kVecSizes; } if (option == kArgAOffset) { a_sizes = tester.kMatSizes; } if (option == kArgBOffset) { b_sizes = tester.kMatSizes; } if (option == kArgCOffset) { c_sizes = tester.kMatSizes; } if (option == kArgAPOffset) { ap_sizes = tester.kMatSizes; } } // Loops over the test-cases from a data-layout point of view for (auto &layout: layouts) { args.layout = layout; for (auto &a_transpose: a_transposes) { args.a_transpose = a_transpose; for (auto &b_transpose: b_transposes) { args.b_transpose = b_transpose; for (auto &side: sides) { args.side = side; for (auto &triangle: triangles) { args.triangle = triangle; for (auto &diagonal: diagonals) { args.diagonal = diagonal; // Creates the arguments vector for the regular tests auto regular_test_vector = std::vector>{}; auto r_args = args; for (auto &m: ms) { r_args.m = m; for (auto &n: ns) { r_args.n = n; for (auto &k: ks) { r_args.k = k; for (auto &ku: kus) { r_args.ku = ku; for (auto &kl: kls) { r_args.kl = kl; for (auto &x_inc: x_incs) { r_args.x_inc = x_inc; for (auto &x_offset: x_offsets) { r_args.x_offset = x_offset; for (auto &y_inc: y_incs) { r_args.y_inc = y_inc; for (auto &y_offset: y_offsets) { r_args.y_offset = y_offset; for (auto &a_ld: a_lds) { r_args.a_ld = a_ld; for (auto &a_offset: a_offsets) { r_args.a_offset = a_offset; for (auto &b_ld: b_lds) { r_args.b_ld = b_ld; for (auto &b_offset: b_offsets) { r_args.b_offset = b_offset; for (auto &c_ld: c_lds) { r_args.c_ld = c_ld; for (auto &c_offset: c_offsets) { r_args.c_offset = c_offset; for (auto &ap_offset: ap_offsets) { r_args.ap_offset = ap_offset; for (auto &dot_offset: dot_offsets) { r_args.dot_offset = dot_offset; for (auto &nrm2_offset: nrm2_offsets) { r_args.nrm2_offset = nrm2_offset; for (auto &asum_offset: asum_offsets) { r_args.asum_offset = asum_offset; for (auto &imax_offset: imax_offsets) { r_args.imax_offset = imax_offset; for (auto &alpha: alphas) { r_args.alpha = alpha; for (auto &beta: betas) { r_args.beta = beta; // Cannot have more for-loops because of MSVC's C1061 error handle_remaining_of_options(regular_test_vector, r_args, tester, kernel_modes, channelss, heights, widths, kernel_hs, kernel_ws, pad_hs, pad_ws, stride_hs, stride_ws, dilation_hs, dilation_ws, batch_counts, num_kernelss); } } } } } } } } } } } } } } } } } } } } } } // Creates the arguments vector for the invalid-buffer tests #ifdef CLBLAST_REF_CLBLAS auto invalid_test_vector = std::vector>{}; auto i_args = args; i_args.m = i_args.n = i_args.k = i_args.kl = i_args.ku = tester.kBufferSize; i_args.a_ld = i_args.b_ld = i_args.c_ld = tester.kBufferSize; i_args.batch_count = 3; i_args.alphas = std::vector(i_args.batch_count); i_args.betas = std::vector(i_args.batch_count); i_args.a_offsets = std::vector(i_args.batch_count); i_args.b_offsets = std::vector(i_args.batch_count); i_args.c_offsets = std::vector(i_args.batch_count); for (auto &x_size: x_sizes) { i_args.x_size = x_size; for (auto &y_size: y_sizes) { i_args.y_size = y_size; for (auto &a_size: a_sizes) { i_args.a_size = a_size; for (auto &b_size: b_sizes) { i_args.b_size = b_size; for (auto &c_size: c_sizes) { i_args.c_size = c_size; for (auto &ap_size: ap_sizes) { i_args.ap_size = ap_size; invalid_test_vector.push_back(i_args); } } } } } } #endif // Sets the name of this test-case auto names = std::vector{}; for (auto &option: options) { if (option == kArgLayout) { names.push_back(ToString(layout)); } if (option == kArgATransp) { names.push_back(ToString(a_transpose)); } if (option == kArgBTransp) { names.push_back(ToString(b_transpose)); } if (option == kArgSide) { names.push_back(ToString(side)); } if (option == kArgTriangle) { names.push_back(ToString(triangle)); } if (option == kArgDiagonal) { names.push_back(ToString(diagonal)); } } if (names.size() == 0) { names.push_back("default"); } auto case_name = std::string{}; for (auto i=size_t{0}; i