diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/correctness/misc/override_parameters.cpp | 15 | ||||
-rw-r--r-- | test/correctness/routines/levelx/xaxpybatched.cpp | 30 | ||||
-rw-r--r-- | test/correctness/testblas.cpp | 42 | ||||
-rw-r--r-- | test/correctness/testblas.hpp | 9 | ||||
-rw-r--r-- | test/correctness/tester.cpp | 1 | ||||
-rw-r--r-- | test/performance/client.cpp | 26 | ||||
-rw-r--r-- | test/performance/routines/levelx/xaxpybatched.cpp | 37 | ||||
-rw-r--r-- | test/routines/levelx/xaxpybatched.hpp | 168 | ||||
-rw-r--r-- | test/routines/levelx/xinvert.hpp | 2 |
9 files changed, 296 insertions, 34 deletions
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index a4cecf0d..e6eebef7 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -11,11 +11,14 @@ // // ================================================================================================= +#include <string> +#include <vector> +#include <unordered_map> +#include <random> + #include "utilities/utilities.hpp" #include "test/routines/level3/xgemm.hpp" -#include <unordered_map> - namespace clblast { // ================================================================================================= @@ -71,9 +74,11 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st auto host_a = std::vector<T>(args.m * args.k); auto host_b = std::vector<T>(args.n * args.k); auto host_c = std::vector<T>(args.m * args.n); - PopulateVector(host_a, kSeed); - PopulateVector(host_b, kSeed); - PopulateVector(host_c, kSeed); + std::mt19937 mt(kSeed); + std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); + PopulateVector(host_a, mt, dist); + PopulateVector(host_b, mt, dist); + PopulateVector(host_c, mt, dist); // Copy the matrices to the device auto device_a = Buffer<T>(context, host_a.size()); diff --git a/test/correctness/routines/levelx/xaxpybatched.cpp b/test/correctness/routines/levelx/xaxpybatched.cpp new file mode 100644 index 00000000..a106440f --- /dev/null +++ b/test/correctness/routines/levelx/xaxpybatched.cpp @@ -0,0 +1,30 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "test/correctness/testblas.hpp" +#include "test/routines/levelx/xaxpybatched.hpp" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + auto errors = size_t{0}; + errors += clblast::RunTests<clblast::TestXaxpyBatched<float>, float, float>(argc, argv, false, "SAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<double>, double, double>(argc, argv, true, "DAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv, true, "CAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv, true, "ZAXPYBATCHED"); + errors += clblast::RunTests<clblast::TestXaxpyBatched<half>, half, half>(argc, argv, true, "HAXPYBATCHED"); + if (errors > 0) { return 1; } else { return 0; } +} + +// ================================================================================================= diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index d959ce18..56376d0b 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -13,7 +13,9 @@ #include <algorithm> #include <iostream> +#include <random> +#include "utilities/utilities.hpp" #include "test/correctness/testblas.hpp" namespace clblast { @@ -25,6 +27,7 @@ template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kIncr template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixDims = { 7, 64 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kMatrixVectorDims = { 61, 256 }; template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kBandSizes = { 4, 19 }; +template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kBatchCounts = { 1, 3 }; // Test settings for the invalid tests template <typename T, typename U> const std::vector<size_t> TestBlas<T,U>::kInvalidIncrements = { 0, 1 }; @@ -79,22 +82,25 @@ TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool si const auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end()); const auto max_matvec = *std::max_element(kMatrixVectorDims.begin(), kMatrixVectorDims.end()); const auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end()); + const auto max_batch_count = *std::max_element(kBatchCounts.begin(), kBatchCounts.end()); // Creates test input data - x_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset); - y_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset); - a_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - b_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - c_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); - ap_source_.resize(std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset); - scalar_source_.resize(std::max(max_mat, max_matvec) + max_offset); - PopulateVector(x_source_, kSeed); - PopulateVector(y_source_, kSeed); - PopulateVector(a_source_, kSeed); - PopulateVector(b_source_, kSeed); - PopulateVector(c_source_, kSeed); - PopulateVector(ap_source_, kSeed); - PopulateVector(scalar_source_, kSeed); + x_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); + y_source_.resize(max_batch_count * std::max(max_vec, max_matvec)*max_inc + max_offset); + a_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + b_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + c_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset); + ap_source_.resize(max_batch_count * std::max(max_mat, max_matvec)*std::max(max_mat, max_matvec) + max_offset); + scalar_source_.resize(max_batch_count * std::max(max_mat, max_matvec) + max_offset); + std::mt19937 mt(kSeed); + std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); + PopulateVector(x_source_, mt, dist); + PopulateVector(y_source_, mt, dist); + PopulateVector(a_source_, mt, dist); + PopulateVector(b_source_, mt, dist); + PopulateVector(c_source_, mt, dist); + PopulateVector(ap_source_, mt, dist); + PopulateVector(scalar_source_, mt, dist); } // =============================================================================================== @@ -190,15 +196,15 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st auto result2 = get_result_(args, buffers2, queue_); // Computes the L2 error - const auto kErrorMarginL2 = getL2ErrorMargin<T>(); auto l2error = 0.0; + const auto kErrorMarginL2 = getL2ErrorMargin<T>(); for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) { for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) { auto index = get_index_(args, id1, id2); l2error += SquaredDifference(result1[index], result2[index]); } } - l2error /= (get_id1_(args) * get_id2_(args)); + l2error /= static_cast<double>(get_id1_(args) * get_id2_(args)); // Checks for differences in the output auto errors = size_t{0}; @@ -219,8 +225,10 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st } } } + + // Report the results if (verbose_ && errors > 0) { - fprintf(stdout, "\n Combined L2 error: %.2e\n ", l2error); + fprintf(stdout, "\n Combined average L2 error: %.2e\n ", l2error); } // Tests the error count (should be zero) diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp index ee795aad..42e8aef7 100644 --- a/test/correctness/testblas.hpp +++ b/test/correctness/testblas.hpp @@ -56,6 +56,7 @@ class TestBlas: public Tester<T,U> { static const std::vector<size_t> kMatrixDims; static const std::vector<size_t> kMatrixVectorDims; static const std::vector<size_t> kBandSizes; + static const std::vector<size_t> kBatchCounts; const std::vector<size_t> kOffsets; const std::vector<U> kAlphaValues; const std::vector<U> kBetaValues; @@ -183,6 +184,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na auto imax_offsets = std::vector<size_t>{args.imax_offset}; auto alphas = std::vector<U>{args.alpha}; auto betas = std::vector<U>{args.beta}; + auto batch_counts = std::vector<size_t>{args.batch_count}; auto x_sizes = std::vector<size_t>{args.x_size}; auto y_sizes = std::vector<size_t>{args.y_size}; auto a_sizes = std::vector<size_t>{args.a_size}; @@ -226,6 +228,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; } if (option == kArgAlpha) { alphas = tester.kAlphaValues; } if (option == kArgBeta) { betas = tester.kBetaValues; } + if (option == kArgBatchCount) { batch_counts = tester.kBatchCounts; } if (option == kArgXOffset) { x_sizes = tester.kVecSizes; } if (option == kArgYOffset) { y_sizes = tester.kVecSizes; } @@ -268,8 +271,10 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na for (auto &imax_offset: imax_offsets) { r_args.imax_offset = imax_offset; for (auto &alpha: alphas) { r_args.alpha = alpha; for (auto &beta: betas) { r_args.beta = beta; - C::SetSizes(r_args); - regular_test_vector.push_back(r_args); + for (auto &batch_count: batch_counts) { r_args.batch_count = batch_count; + C::SetSizes(r_args); + regular_test_vector.push_back(r_args); + } } } } diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index cbfc5bb2..40784fdb 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -367,6 +367,7 @@ std::string Tester<T,U>::GetOptionsString(const Arguments<U> &args) { if (o == kArgDotOffset){ result += kArgDotOffset + equals + ToString(args.dot_offset) + " "; } if (o == kArgAlpha) { result += kArgAlpha + equals + ToString(args.alpha) + " "; } if (o == kArgBeta) { result += kArgBeta + equals + ToString(args.beta) + " "; } + if (o == kArgBatchCount){result += kArgBatchCount + equals + ToString(args.batch_count) + " "; } } return result; } diff --git a/test/performance/client.cpp b/test/performance/client.cpp index 2c45b35e..bd48b047 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -11,13 +11,15 @@ // // ================================================================================================= -#include "test/performance/client.hpp" - #include <string> #include <vector> #include <utility> #include <algorithm> #include <chrono> +#include <random> + +#include "utilities/utilities.hpp" +#include "test/performance/client.hpp" namespace clblast { // ================================================================================================= @@ -89,6 +91,9 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le if (o == kArgAsumOffset) { args.asum_offset = GetArgument(command_line_args, help, kArgAsumOffset, size_t{0}); } if (o == kArgImaxOffset) { args.imax_offset = GetArgument(command_line_args, help, kArgImaxOffset, size_t{0}); } + // Batch arguments + if (o == kArgBatchCount) { args.batch_count = GetArgument(command_line_args, help, kArgBatchCount, size_t{1}); } + // Scalar values if (o == kArgAlpha) { args.alpha = GetArgument(command_line_args, help, kArgAlpha, GetScalar<U>()); } if (o == kArgBeta) { args.beta = GetArgument(command_line_args, help, kArgBeta, GetScalar<U>()); } @@ -179,13 +184,15 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes) std::vector<T> c_source(args.c_size); std::vector<T> ap_source(args.ap_size); std::vector<T> scalar_source(args.scalar_size); - PopulateVector(x_source, kSeed); - PopulateVector(y_source, kSeed); - PopulateVector(a_source, kSeed); - PopulateVector(b_source, kSeed); - PopulateVector(c_source, kSeed); - PopulateVector(ap_source, kSeed); - PopulateVector(scalar_source, kSeed); + std::mt19937 mt(kSeed); + std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit); + PopulateVector(x_source, mt, dist); + PopulateVector(y_source, mt, dist); + PopulateVector(a_source, mt, dist); + PopulateVector(b_source, mt, dist); + PopulateVector(c_source, mt, dist); + PopulateVector(ap_source, mt, dist); + PopulateVector(scalar_source, mt, dist); // Creates the matrices on the device auto x_vec = Buffer<T>(context, args.x_size); @@ -335,6 +342,7 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args, else if (o == kArgNrm2Offset){integers.push_back(args.nrm2_offset); } else if (o == kArgAsumOffset){integers.push_back(args.asum_offset); } else if (o == kArgImaxOffset){integers.push_back(args.imax_offset); } + else if (o == kArgBatchCount){integers.push_back(args.batch_count); } } auto strings = std::vector<std::string>{}; for (auto &o: options_) { diff --git a/test/performance/routines/levelx/xaxpybatched.cpp b/test/performance/routines/levelx/xaxpybatched.cpp new file mode 100644 index 00000000..6d3bcb51 --- /dev/null +++ b/test/performance/routines/levelx/xaxpybatched.cpp @@ -0,0 +1,37 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "test/performance/client.hpp" +#include "test/routines/levelx/xaxpybatched.hpp" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); + switch(clblast::GetPrecision(command_line_args, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: + clblast::RunClient<clblast::TestXaxpyBatched<half>, half, half>(argc, argv); break; + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXaxpyBatched<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXaxpyBatched<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: + clblast::RunClient<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: + clblast::RunClient<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv); break; + } + return 0; +} + +// ================================================================================================= diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp new file mode 100644 index 00000000..ee15ff92 --- /dev/null +++ b/test/routines/levelx/xaxpybatched.hpp @@ -0,0 +1,168 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a class with static methods to describe the XaxpyBatched routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ +#define CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ + +#include <vector> +#include <string> + +#include "utilities/utilities.hpp" + +#ifdef CLBLAST_REF_CLBLAS + #include "test/wrapper_clblas.hpp" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "test/wrapper_cblas.hpp" +#endif + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class TestXaxpyBatched { + public: + + // Although it is a non-BLAS routine, it can still be tested against level-1 routines in a loop + static size_t BLASLevel() { return 1; } + + // The list of arguments relevant for this routine + static std::vector<std::string> GetOptions() { + return {kArgN, + kArgXInc, kArgYInc, + kArgBatchCount, kArgAlpha}; + } + + // Helper for the sizes per batch + static size_t PerBatchSizeX(const Arguments<T> &args) { return args.n * args.x_inc; } + static size_t PerBatchSizeY(const Arguments<T> &args) { return args.n * args.y_inc; } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments<T> &args) { + return PerBatchSizeX(args) * args.batch_count + args.x_offset; + } + static size_t GetSizeY(const Arguments<T> &args) { + return PerBatchSizeY(args) * args.batch_count + args.y_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments<T> &args) { + args.x_size = GetSizeX(args); + args.y_size = GetSizeY(args); + + // Also sets the batch-related variables + args.x_offsets = std::vector<size_t>(args.batch_count); + args.y_offsets = std::vector<size_t>(args.batch_count); + args.alphas = std::vector<T>(args.batch_count); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + args.x_offsets[batch] = batch * PerBatchSizeX(args) + args.x_offset; + args.y_offsets[batch] = batch * PerBatchSizeY(args) + args.y_offset; + args.alphas[batch] = args.alpha + Constant<T>(batch); + } + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector<Transpose>; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = AxpyBatched(args.n, args.alphas.data(), + buffers.x_vec(), args.x_offsets.data(), args.x_inc, + buffers.y_vec(), args.y_offsets.data(), args.y_inc, + args.batch_count, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto event = cl_event{}; + auto status = clblasXaxpy(args.n, args.alphas[batch], + buffers.x_vec, args.x_offsets[batch], args.x_inc, + buffers.y_vec, args.y_offsets[batch], args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + if (static_cast<StatusCode>(status) != StatusCode::kSuccess) { + return static_cast<StatusCode>(status); + } + } + return StatusCode::kSuccess; + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); + std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0)); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + cblasXaxpy(args.n, args.alphas[batch], + x_vec_cpu, args.x_offsets[batch], args.x_inc, + y_vec_cpu, args.y_offsets[batch], args.y_inc); + } + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif + + // Describes how to download the results of the computation + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> result(args.y_size, static_cast<T>(0)); + buffers.y_vec.Read(queue, args.y_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments<T> &args) { return args.n; } + static size_t ResultID2(const Arguments<T> &args) { return args.batch_count; } + static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) { + return (id1 * args.y_inc) + args.y_offsets[id2]; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments<T> &args) { + return args.batch_count * (2 * args.n); + } + static size_t GetBytes(const Arguments<T> &args) { + return args.batch_count * (3 * args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ +#endif diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index 05bea9aa..b470dbf3 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -19,7 +19,7 @@ #include <vector> #include <string> -#include "routines/levelx/xinvert.hpp" +#include "utilities/utilities.hpp" namespace clblast { // ================================================================================================= |