diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-05-30 12:30:43 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-05-30 12:30:43 +0200 |
commit | bc5a341dfe591946e925db315fc7d8c0c25c2938 (patch) | |
tree | b216ab5eee4863e3807d92b5ddd19fa22197ed22 /test | |
parent | c7b054ea6747039f4405fd93da6e924f3e5c7f4b (diff) |
Initial commit of preview version
Diffstat (limited to 'test')
-rw-r--r-- | test/correctness/routines/xaxpy.cc | 81 | ||||
-rw-r--r-- | test/correctness/routines/xgemm.cc | 104 | ||||
-rw-r--r-- | test/correctness/routines/xsymm.cc | 104 | ||||
-rw-r--r-- | test/correctness/testabc.cc | 212 | ||||
-rw-r--r-- | test/correctness/testabc.h | 94 | ||||
-rw-r--r-- | test/correctness/tester.cc | 307 | ||||
-rw-r--r-- | test/correctness/tester.h | 132 | ||||
-rw-r--r-- | test/correctness/testxy.cc | 172 | ||||
-rw-r--r-- | test/correctness/testxy.h | 83 | ||||
-rw-r--r-- | test/performance/client.cc | 295 | ||||
-rw-r--r-- | test/performance/client.h | 85 | ||||
-rw-r--r-- | test/performance/graphs/common.r | 189 | ||||
-rw-r--r-- | test/performance/graphs/xaxpy.r | 96 | ||||
-rwxr-xr-x | test/performance/graphs/xgemm.r | 94 | ||||
-rw-r--r-- | test/performance/graphs/xsymm.r | 94 | ||||
-rw-r--r-- | test/performance/routines/xaxpy.cc | 97 | ||||
-rw-r--r-- | test/performance/routines/xgemm.cc | 115 | ||||
-rw-r--r-- | test/performance/routines/xsymm.cc | 115 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 216 |
19 files changed, 2685 insertions, 0 deletions
diff --git a/test/correctness/routines/xaxpy.cc b/test/correctness/routines/xaxpy.cc new file mode 100644 index 00000000..aa90766e --- /dev/null +++ b/test/correctness/routines/xaxpy.cc @@ -0,0 +1,81 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the tests for the Xaxpy routine. It is based on the TestXY class. +// +// ================================================================================================= + +#include "wrapper_clblas.h" +#include "correctness/testxy.h" + +namespace clblast { +// ================================================================================================= + +// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison. +template <typename T> +void XaxpyTest(int argc, char *argv[], const bool silent, const std::string &name) { + + // Creates the CLBlast lambda + auto clblast_lambda = [](const Arguments<T> &args, + const Buffer &x_vec, const Buffer &y_vec, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + return Axpy(args.n, args.alpha, + x_vec(), args.x_offset, args.x_inc, + y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [](const Arguments<T> &args, + const Buffer &x_vec, const Buffer &y_vec, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXaxpy(args.n, args.alpha, + x_vec(), args.x_offset, args.x_inc, + y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + return static_cast<StatusCode>(status); + }; + + // Selects the platform and device on which to test (command-line options) + auto help = std::string{"Options given/available:\n"}; + const auto platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0}); + const auto device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0}); + if (!silent) { fprintf(stdout, "\n* %s\n", help.c_str()); } + + // Initializes the other arguments relevant for this routine + auto args = Arguments<T>{}; + const auto options = std::vector<std::string>{kArgN, kArgXInc, kArgYInc, + kArgXOffset, kArgYOffset, kArgAlpha}; + + // Creates a tester + TestXY<T> tester{platform_id, device_id, name, options, clblast_lambda, clblas_lambda}; + + // Runs the tests + const auto case_name = "default"; + tester.TestRegular(args, case_name); + tester.TestInvalidBufferSizes(args, case_name); +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::XaxpyTest<float>(argc, argv, false, "SAXPY"); + clblast::XaxpyTest<double>(argc, argv, true, "DAXPY"); + clblast::XaxpyTest<clblast::float2>(argc, argv, true, "CAXPY"); + clblast::XaxpyTest<clblast::double2>(argc, argv, true, "ZAXPY"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/routines/xgemm.cc b/test/correctness/routines/xgemm.cc new file mode 100644 index 00000000..04525cc5 --- /dev/null +++ b/test/correctness/routines/xgemm.cc @@ -0,0 +1,104 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the tests for the Xgemm routine. It is based on the TestABC class. +// +// ================================================================================================= + +#include "wrapper_clblas.h" +#include "correctness/testabc.h" + +namespace clblast { +// ================================================================================================= + +// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison. +template <typename T> +void XgemmTest(int argc, char *argv[], const bool silent, const std::string &name) { + + // Creates the CLBlast lambda + auto clblast_lambda = [](const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + return Gemm(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [](const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgemm(static_cast<clblasOrder>(args.layout), + static_cast<clblasTranspose>(args.a_transpose), + static_cast<clblasTranspose>(args.b_transpose), + args.m, args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + return static_cast<StatusCode>(status); + }; + + // Selects the platform and device on which to test (command-line options) + auto help = std::string{"Options given/available:\n"}; + const auto platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0}); + const auto device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0}); + if (!silent) { fprintf(stdout, "\n* %s\n", help.c_str()); } + + // Initializes the other arguments relevant for this routine + auto args = Arguments<T>{}; + const auto options = std::vector<std::string>{kArgM, kArgN, kArgK, kArgLayout, + kArgATransp, kArgBTransp, + kArgALeadDim, kArgBLeadDim, kArgCLeadDim, + kArgAOffset, kArgBOffset, kArgCOffset}; + + // Creates a tester + TestABC<T> tester{platform_id, device_id, name, options, clblast_lambda, clblas_lambda}; + + // Loops over the test-cases from a data-layout point of view + for (auto &layout: {Layout::kRowMajor, Layout::kColMajor}) { + args.layout = layout; + for (auto &a_transpose: {Transpose::kNo, Transpose::kYes}) { + args.a_transpose = a_transpose; + for (auto &b_transpose: {Transpose::kNo, Transpose::kYes}) { + args.b_transpose = b_transpose; + const auto case_name = ToString(layout)+" "+ToString(a_transpose)+" "+ToString(b_transpose); + + // Runs the tests + tester.TestRegular(args, case_name); + tester.TestInvalidBufferSizes(args, case_name); + } + } + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::XgemmTest<float>(argc, argv, false, "SGEMM"); + clblast::XgemmTest<double>(argc, argv, true, "DGEMM"); + //clblast::XgemmTest<float2>(argc, argv, true, "CGEMM"); + //clblast::XgemmTest<double2>(argc, argv, true, "ZGEMM"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/routines/xsymm.cc b/test/correctness/routines/xsymm.cc new file mode 100644 index 00000000..9bcad253 --- /dev/null +++ b/test/correctness/routines/xsymm.cc @@ -0,0 +1,104 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the tests for the Xsymm routine. It is based on the TestABC class. +// +// ================================================================================================= + +#include "wrapper_clblas.h" +#include "correctness/testabc.h" + +namespace clblast { +// ================================================================================================= + +// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison. +template <typename T> +void XsymmTest(int argc, char *argv[], const bool silent, const std::string &name) { + + // Creates the CLBlast lambda + auto clblast_lambda = [](const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + return Symm(args.layout, args.side, args.triangle, + args.m, args.n, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [](const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) -> StatusCode { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsymm(static_cast<clblasOrder>(args.layout), + static_cast<clblasSide>(args.side), + static_cast<clblasUplo>(args.triangle), + args.m, args.n, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + return static_cast<StatusCode>(status); + }; + + // Selects the platform and device on which to test (command-line options) + auto help = std::string{"Options given/available:\n"}; + const auto platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0}); + const auto device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0}); + if (!silent) { fprintf(stdout, "\n* %s\n", help.c_str()); } + + // Initializes the other arguments relevant for this routine + auto args = Arguments<T>{}; + const auto options = std::vector<std::string>{kArgM, kArgN, kArgLayout, + kArgSide, kArgTriangle, + kArgALeadDim, kArgBLeadDim, kArgCLeadDim, + kArgAOffset, kArgBOffset, kArgCOffset}; + + // Creates a tester + TestABC<T> tester{platform_id, device_id, name, options, clblast_lambda, clblas_lambda}; + + // Loops over the test-cases from a data-layout point of view + for (auto &layout: {Layout::kRowMajor, Layout::kColMajor}) { + args.layout = layout; + for (auto &side: {Side::kLeft, Side::kRight}) { + args.side = side; + for (auto &triangle: {Triangle::kUpper, Triangle::kLower}) { + args.triangle = triangle; + const auto case_name = ToString(layout)+" "+ToString(side)+" "+ToString(triangle); + + // Runs the tests + tester.TestRegular(args, case_name); + tester.TestInvalidBufferSizes(args, case_name); + } + } + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::XsymmTest<float>(argc, argv, false, "SSYMM"); + clblast::XsymmTest<double>(argc, argv, true, "DSYMM"); + //clblast::XsymmTest<float2>(argc, argv, true, "CSYMM"); + //clblast::XsymmTest<double2>(argc, argv, true, "ZSYMM"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/testabc.cc b/test/correctness/testabc.cc new file mode 100644 index 00000000..5d5869c8 --- /dev/null +++ b/test/correctness/testabc.cc @@ -0,0 +1,212 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the TestABC class (see the header for information about the class). +// +// ================================================================================================= + +#include <algorithm> + +#include "correctness/testabc.h" + +namespace clblast { +// ================================================================================================= + +// Constructor, initializes the base class tester and input data +template <typename T> +TestABC<T>::TestABC(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options, + const Routine clblast_lambda, const Routine clblas_lambda): + Tester<T>{platform_id, device_id, name, options}, + clblast_lambda_(clblast_lambda), + clblas_lambda_(clblas_lambda) { + + // Computes the maximum sizes. This allows for a single set of input/output buffers. + auto max_dim = *std::max_element(kMatrixDims.begin(), kMatrixDims.end()); + auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end()); + auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end()); + + // Creates test input data + a_source_.resize(max_dim*max_ld + max_offset); + b_source_.resize(max_dim*max_ld + max_offset); + c_source_.resize(max_dim*max_ld + max_offset); + PopulateVector(a_source_); + PopulateVector(b_source_); + PopulateVector(c_source_); +} + +// =============================================================================================== + +// Tests the routine for a wide variety of parameters +template <typename T> +void TestABC<T>::TestRegular(Arguments<T> &args, const std::string &name) { + TestStart("regular behaviour", name); + + // Computes whether or not the matrices are transposed. Note that we assume a default of + // column-major and no-transpose. If one of them is different (but not both), then rotated + // is considered true. + auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) || + (args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo); + auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose != Transpose::kNo) || + (args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo); + auto c_rotated = (args.layout == Layout::kRowMajor); + + // Iterates over the matrix dimensions + for (auto &m: kMatrixDims) { + args.m = m; + for (auto &n: kMatrixDims) { + args.n = n; + for (auto &k: kMatrixDims) { + args.k = k; + + // Computes the second dimensions of the matrices taking the rotation into account + auto a_two = (a_rotated) ? m : k; + auto b_two = (b_rotated) ? k : n; + auto c_two = (c_rotated) ? m : n; + + // Iterates over the leading-dimension values and the offsets + for (auto &a_ld: kMatrixDims) { + args.a_ld = a_ld; + for (auto &a_offset: kOffsets) { + args.a_offset = a_offset; + for (auto &b_ld: kMatrixDims) { + args.b_ld = b_ld; + for (auto &b_offset: kOffsets) { + args.b_offset = b_offset; + for (auto &c_ld: kMatrixDims) { + args.c_ld = c_ld; + for (auto &c_offset: kOffsets) { + args.c_offset = c_offset; + + // Computes the buffer sizes + auto a_size = a_two * a_ld + a_offset; + auto b_size = b_two * b_ld + b_offset; + auto c_size = c_two * c_ld + c_offset; + if (a_size < 1 || b_size < 1 || c_size < 1) { continue; } + + // Creates the OpenCL buffers + auto a_mat = Buffer(context_, CL_MEM_READ_WRITE, a_size*sizeof(T)); + auto b_mat = Buffer(context_, CL_MEM_READ_WRITE, b_size*sizeof(T)); + auto r_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T)); + auto s_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T)); + + // Iterates over the values for alpha and beta + for (auto &alpha: kAlphaValues) { + args.alpha = alpha; + for (auto &beta: kBetaValues) { + args.beta = beta; + + // Runs the reference clBLAS code + a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_); + b_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_); + r_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_); + auto status1 = clblas_lambda_(args, a_mat, b_mat, r_mat, queue_); + + // Runs the CLBlast code + a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_); + b_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_); + s_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_); + auto status2 = clblast_lambda_(args, a_mat, b_mat, s_mat, queue_); + + // Tests for equality of the two status codes + if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) { + TestErrorCodes(status1, status2, args); + continue; + } + + // Downloads the results + std::vector<T> r_result(c_size, static_cast<T>(0)); + std::vector<T> s_result(c_size, static_cast<T>(0)); + r_mat.ReadBuffer(queue_, c_size*sizeof(T), r_result); + s_mat.ReadBuffer(queue_, c_size*sizeof(T), s_result); + + // Checks for differences in the output + auto errors = size_t{0}; + for (auto idm=size_t{0}; idm<m; ++idm) { + for (auto idn=size_t{0}; idn<n; ++idn) { + auto index = (args.layout == Layout::kRowMajor) ? + idm*args.c_ld + idn + args.c_offset: + idn*args.c_ld + idm + args.c_offset; + if (!TestSimilarity(r_result[index], s_result[index], kErrorMargin)) { + errors++; + } + } + } + + // Tests the error count (should be zero) + TestErrorCount(errors, m*n, args); + } + } + } + } + } + } + } + } + } + } + } + TestEnd(); +} + +// ================================================================================================= + +// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types, +// does not test for results (if any). +template <typename T> +void TestABC<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) { + TestStart("invalid buffer sizes", name); + + // Sets example test parameters + args.m = kBufferSize; + args.n = kBufferSize; + args.k = kBufferSize; + args.a_ld = kBufferSize; + args.b_ld = kBufferSize; + args.c_ld = kBufferSize; + + // Iterates over test buffer sizes + const std::vector<size_t> kBufferSizes = {0, kBufferSize - 1, kBufferSize}; + for (auto &a_size: kBufferSizes) { + for (auto &b_size: kBufferSizes) { + for (auto &c_size: kBufferSizes) { + + // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly + // want to be able to create invalid buffers (no error checking here). + auto a = clCreateBuffer(context_(), CL_MEM_READ_WRITE, a_size*sizeof(T), nullptr, nullptr); + auto a_mat = Buffer(a); + auto b = clCreateBuffer(context_(), CL_MEM_READ_WRITE, b_size*sizeof(T), nullptr, nullptr); + auto b_mat = Buffer(b); + auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr); + auto r_mat = Buffer(r); + auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr); + auto s_mat = Buffer(s); + + // Runs the two routines + auto status1 = clblas_lambda_(args, a_mat, b_mat, r_mat, queue_); + auto status2 = clblast_lambda_(args, a_mat, b_mat, s_mat, queue_); + + // Tests for equality of the two status codes + TestErrorCodes(status1, status2, args); + } + } + } + TestEnd(); +} + +// ================================================================================================= + +// Compiles the templated class +template class TestABC<float>; +template class TestABC<double>; +template class TestABC<float2>; +template class TestABC<double2>; + +// ================================================================================================= +} // namespace clblast diff --git a/test/correctness/testabc.h b/test/correctness/testabc.h new file mode 100644 index 00000000..bb06ea22 --- /dev/null +++ b/test/correctness/testabc.h @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file tests any mat-mat-mat (A,B,C) routine. It contains two types of tests: one testing +// all sorts of input combinations, and one deliberatly testing with invalid values. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_CORRECTNESS_TESTABC_H_ +#define CLBLAST_TEST_CORRECTNESS_TESTABC_H_ + +#include <vector> +#include <string> + +#include "correctness/tester.h" + +namespace clblast { +// ================================================================================================= + +// Defines the parameters that delineate individual test-cases +struct Parameters { + Layout layout; + Transpose a_transpose; + Transpose b_transpose; + std::string GetString() const { + return "Layout: "+ToString(layout)+", A: "+ToString(a_transpose)+ + ", B: "+ToString(b_transpose); + } +}; + +// See comment at top of file for a description of the class +template <typename T> +class TestABC: public Tester<T> { + public: + + // Uses several variables from the Tester class + using Tester<T>::context_; + using Tester<T>::queue_; + using Tester<T>::kErrorMargin; + + // Uses several helper functions from the Tester class + using Tester<T>::TestStart; + using Tester<T>::TestEnd; + using Tester<T>::TestSimilarity; + using Tester<T>::TestErrorCount; + using Tester<T>::TestErrorCodes; + using Tester<T>::GetExampleScalars; + + // Test settings for the regular test. Append to this list in case more tests are required. + const std::vector<size_t> kMatrixDims = { 7, 64 }; + const std::vector<size_t> kOffsets = { 0 }; + const std::vector<T> kAlphaValues = GetExampleScalars(); + const std::vector<T> kBetaValues = GetExampleScalars(); + + // Test settings for the invalid test + const size_t kBufferSize = 64; + + // Shorthand for a BLAS routine + using Routine = std::function<StatusCode(const Arguments<T>&, + const Buffer&, const Buffer&, const Buffer&, + CommandQueue&)>; + + // Constructor, initializes the base class tester and input data + TestABC(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options, + const Routine clblast_lambda, const Routine clblas_lambda); + + // The test functions, taking no inputs + void TestRegular(Arguments<T> &args, const std::string &name); + void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name); + + private: + + // Source data to test with + std::vector<T> a_source_; + std::vector<T> b_source_; + std::vector<T> c_source_; + + // The routines to test + Routine clblast_lambda_; + Routine clblas_lambda_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_CORRECTNESS_TESTABC_H_ +#endif diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc new file mode 100644 index 00000000..da1cb152 --- /dev/null +++ b/test/correctness/tester.cc @@ -0,0 +1,307 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Tester class (see the header for information about the class). +// +// ================================================================================================= + +#include "correctness/tester.h" + +#include <string> +#include <vector> +#include <iostream> +#include <cmath> +#include <limits> + +namespace clblast { +// ================================================================================================= + +// General constructor for all CLBlast testers. It prints out the test header to stdout and sets-up +// the clBLAS library for reference. +template <typename T> +Tester<T>::Tester(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options): + platform_(Platform(platform_id)), + device_(Device(platform_, kDeviceType, device_id)), + context_(Context(device_)), + queue_(CommandQueue(context_, device_)), + error_log_{}, + num_passed_{0}, + num_skipped_{0}, + num_errors_{0}, + print_count_{0}, + tests_failed_{0}, + tests_passed_{0}, + options_{options} { + + // Prints the header + fprintf(stdout, "* Running on OpenCL device '%s'.\n", device_.Name().c_str()); + fprintf(stdout, "* Starting tests for the %s'%s'%s routine. Legend:\n", + kPrintMessage.c_str(), name.c_str(), kPrintEnd.c_str()); + fprintf(stdout, " %s -> Test produced correct results\n", kSuccessData.c_str()); + fprintf(stdout, " %s -> Test returned the correct error code\n", kSuccessStatus.c_str()); + fprintf(stdout, " %s -> Test produced incorrect results\n", kErrorData.c_str()); + fprintf(stdout, " %s -> Test returned an incorrect error code\n", kErrorStatus.c_str()); + fprintf(stdout, " %s -> Test not executed: OpenCL-kernel compilation error\n", + kSkippedCompilation.c_str()); + fprintf(stdout, " %s -> Test not executed: Unsupported precision\n", + kUnsupportedPrecision.c_str()); + + // Initializes clBLAS + auto status = clblasSetup(); + if (status != CL_SUCCESS) { + throw std::runtime_error("clBLAS setup error: "+ToString(static_cast<int>(status))); + } +} + +// Destructor prints the summary of the test cases and cleans-up the clBLAS library +template <typename T> +Tester<T>::~Tester() { + fprintf(stdout, "* Completed all test-cases for this routine. Results:\n"); + fprintf(stdout, " %lu test(s) succeeded\n", tests_passed_); + if (tests_failed_ != 0) { + fprintf(stdout, " %s%lu test(s) failed%s\n", + kPrintError.c_str(), tests_failed_, kPrintEnd.c_str()); + } + else { + fprintf(stdout, " %lu test(s) failed\n", tests_failed_); + } + fprintf(stdout, "\n"); + clblasTeardown(); +} + +// ================================================================================================= + +// Function called at the start of each test. This prints a header with information about the +// test and re-initializes all test data-structures. +template <typename T> +void Tester<T>::TestStart(const std::string &test_name, const std::string &test_configuration) { + + // Prints the header + fprintf(stdout, "* Testing %s'%s'%s for %s'%s'%s:\n", + kPrintMessage.c_str(), test_name.c_str(), kPrintEnd.c_str(), + kPrintMessage.c_str(), test_configuration.c_str(), kPrintEnd.c_str()); + fprintf(stdout, " "); + + // Empties the error log and the error/pass counters + error_log_.clear(); + num_passed_ = 0; + num_skipped_ = 0; + num_errors_ = 0; + print_count_ = 0; +} + +// Function called at the end of each test. This prints errors if any occured. It also prints a +// summary of the number of sub-tests passed/failed. +template <typename T> +void Tester<T>::TestEnd() { + fprintf(stdout, "\n"); + if (error_log_.size() == 0) { tests_passed_++; } else { tests_failed_++; } + + // Prints details of all error occurences for these tests + for (auto &entry: error_log_) { + if (entry.error_percentage != kStatusError) { + fprintf(stdout, " Error rate %.1lf%%: ", entry.error_percentage); + } + else { + fprintf(stdout, " Status code %d (expected %d): ", entry.status_found, entry.status_expect); + } + for (auto &o: options_) { + if (o == kArgM) { fprintf(stdout, "%s=%lu ", kArgM, entry.args.m); } + if (o == kArgN) { fprintf(stdout, "%s=%lu ", kArgN, entry.args.n); } + if (o == kArgK) { fprintf(stdout, "%s=%lu ", kArgK, entry.args.k); } + if (o == kArgLayout) { fprintf(stdout, "%s=%d ", kArgLayout, entry.args.layout);} + if (o == kArgATransp) { fprintf(stdout, "%s=%d ", kArgATransp, entry.args.a_transpose);} + if (o == kArgBTransp) { fprintf(stdout, "%s=%d ", kArgBTransp, entry.args.b_transpose);} + if (o == kArgSide) { fprintf(stdout, "%s=%d ", kArgSide, entry.args.side);} + if (o == kArgTriangle) { fprintf(stdout, "%s=%d ", kArgTriangle, entry.args.triangle);} + if (o == kArgXInc) { fprintf(stdout, "%s=%lu ", kArgXInc, entry.args.x_inc);} + if (o == kArgYInc) { fprintf(stdout, "%s=%lu ", kArgYInc, entry.args.y_inc);} + if (o == kArgXOffset) { fprintf(stdout, "%s=%lu ", kArgXOffset, entry.args.x_offset);} + if (o == kArgYOffset) { fprintf(stdout, "%s=%lu ", kArgYOffset, entry.args.y_offset);} + if (o == kArgALeadDim) { fprintf(stdout, "%s=%lu ", kArgALeadDim, entry.args.a_ld);} + if (o == kArgBLeadDim) { fprintf(stdout, "%s=%lu ", kArgBLeadDim, entry.args.b_ld);} + if (o == kArgCLeadDim) { fprintf(stdout, "%s=%lu ", kArgCLeadDim, entry.args.c_ld);} + if (o == kArgAOffset) { fprintf(stdout, "%s=%lu ", kArgAOffset, entry.args.a_offset);} + if (o == kArgBOffset) { fprintf(stdout, "%s=%lu ", kArgBOffset, entry.args.b_offset);} + if (o == kArgCOffset) { fprintf(stdout, "%s=%lu ", kArgCOffset, entry.args.c_offset);} + } + fprintf(stdout, "\n"); + } + + // Prints a test summary + auto pass_rate = 100*num_passed_ / static_cast<float>(num_passed_ + num_skipped_ + num_errors_); + fprintf(stdout, " Pass rate %s%5.1lf%%%s:", kPrintMessage.c_str(), pass_rate, kPrintEnd.c_str()); + fprintf(stdout, " %lu passed /", num_passed_); + if (num_skipped_ != 0) { + fprintf(stdout, " %s%lu skipped%s /", kPrintWarning.c_str(), num_skipped_, kPrintEnd.c_str()); + } + else { + fprintf(stdout, " %lu skipped /", num_skipped_); + } + if (num_errors_ != 0) { + fprintf(stdout, " %s%lu failed%s\n", kPrintError.c_str(), num_errors_, kPrintEnd.c_str()); + } + else { + fprintf(stdout, " %lu failed\n", num_errors_); + } +} + +// ================================================================================================= + +// Compares two floating point values and returns whether they are within an acceptable error +// margin. This replaces GTest's EXPECT_NEAR(). +template <typename T> +bool Tester<T>::TestSimilarity(const T val1, const T val2, const double margin) { + const auto difference = std::fabs(val1 - val2); + + // Shortcut, handles infinities + if (val1 == val2) { + return true; + } + // The values are zero or both are extremely close to it relative error is less meaningful + else if (val1 == 0 || val2 == 0 || difference < std::numeric_limits<T>::min()) { + return difference < (static_cast<T>(margin) * std::numeric_limits<T>::min()); + } + // Use relative error + else { + return (difference / (std::fabs(val1) + std::fabs(val2))) < static_cast<T>(margin); + } +} + +// Specialisations for complex data-types +template <> +bool Tester<float2>::TestSimilarity(const float2 val1, const float2 val2, const double margin) { + auto real = Tester<float>::TestSimilarity(val1.real(), val2.real(), margin); + auto imag = Tester<float>::TestSimilarity(val1.imag(), val2.imag(), margin); + return (real && imag); +} +template <> +bool Tester<double2>::TestSimilarity(const double2 val1, const double2 val2, const double margin) { + auto real = Tester<double>::TestSimilarity(val1.real(), val2.real(), margin); + auto imag = Tester<double>::TestSimilarity(val1.imag(), val2.imag(), margin); + return (real && imag); +} + +// ================================================================================================= + +// Handles a 'pass' or 'error' depending on whether there are any errors +template <typename T> +void Tester<T>::TestErrorCount(const size_t errors, const size_t size, const Arguments<T> &args) { + + // Finished successfully + if (errors == 0) { + PrintTestResult(kSuccessData); + ReportPass(); + } + + // Error(s) occurred + else { + auto percentage = 100*errors / static_cast<float>(size); + PrintTestResult(kErrorData); + ReportError({StatusCode::kSuccess, StatusCode::kSuccess, percentage, args}); + } +} + +// Compares two status codes for equality. The outcome can be a pass (they are the same), a warning +// (CLBlast reported a compilation error), or an error (they are different). +template <typename T> +void Tester<T>::TestErrorCodes(const StatusCode clblas_status, const StatusCode clblast_status, + const Arguments<T> &args) { + + // Finished successfully + if (clblas_status == clblast_status) { + PrintTestResult(kSuccessStatus); + ReportPass(); + } + + // No support for this kind of precision + else if (clblast_status == StatusCode::kNoDoublePrecision || + clblast_status == StatusCode::kNoHalfPrecision) { + PrintTestResult(kUnsupportedPrecision); + ReportSkipped(); + } + + // Could not compile the CLBlast kernel properly + else if (clblast_status == StatusCode::kBuildProgramFailure || + clblast_status == StatusCode::kNotImplemented) { + PrintTestResult(kSkippedCompilation); + ReportSkipped(); + } + + // Error occurred + else { + PrintTestResult(kErrorStatus); + ReportError({clblas_status, clblast_status, kStatusError, args}); + } +} + +// ================================================================================================= + +// Retrieves a list of example scalar values, used for the alpha and beta arguments for the various +// routines. This function is specialised for the different data-types. +template <> +const std::vector<float> Tester<float>::GetExampleScalars() { + return {0.0f, 1.0f, 3.14f}; +} +template <> +const std::vector<double> Tester<double>::GetExampleScalars() { + return {0.0, 1.0, 3.14}; +} +template <> +const std::vector<float2> Tester<float2>::GetExampleScalars() { + return {{0.0f, 0.0f}, {1.0f, 1.3f}, {2.42f, 3.14f}}; +} +template <> +const std::vector<double2> Tester<double2>::GetExampleScalars() { + return {{0.0, 0.0}, {1.0, 1.3}, {2.42, 3.14}}; +} + +// ================================================================================================= + +// A test can either pass, be skipped, or fail +template <typename T> +void Tester<T>::ReportPass() { + num_passed_++; +} +template <typename T> +void Tester<T>::ReportSkipped() { + num_skipped_++; +} +template <typename T> +void Tester<T>::ReportError(const ErrorLogEntry &error_log_entry) { + error_log_.push_back(error_log_entry); + num_errors_++; +} + +// ================================================================================================= + +// Prints the test-result symbol to screen. This function limits the maximum number of symbols per +// line by printing newlines once every so many calls. +template <typename T> +void Tester<T>::PrintTestResult(const std::string &message) { + if (print_count_ == kResultsPerLine) { + print_count_ = 0; + fprintf(stdout, "\n "); + } + fprintf(stdout, "%s", message.c_str()); + std::cout << std::flush; + print_count_++; +} + +// ================================================================================================= + +// Compiles the templated class +template class Tester<float>; +template class Tester<double>; +template class Tester<float2>; +template class Tester<double2>; + +// ================================================================================================= +} // namespace clblast diff --git a/test/correctness/tester.h b/test/correctness/tester.h new file mode 100644 index 00000000..12f6125a --- /dev/null +++ b/test/correctness/tester.h @@ -0,0 +1,132 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Tester class, providing a test-framework. GTest was used before, but +// was not able to handle certain cases (e.g. template type + parameters). This is its (basic) +// custom replacement. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_CORRECTNESS_TESTER_H_ +#define CLBLAST_TEST_CORRECTNESS_TESTER_H_ + +#include <string> +#include <vector> +#include <memory> + +// The libraries +#include <clBLAS.h> +#include "clblast.h" + +#include "internal/utilities.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Tester { + public: + + // Types of devices to consider + const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; + + // Maximum number of test results printed on a single line + static constexpr auto kResultsPerLine = size_t{64}; + + // Error percentage is not applicable: error was caused by an incorrect status + static constexpr auto kStatusError = -1.0f; + + // Set the allowed error margin for floating-point comparisons + static constexpr auto kErrorMargin = 1.0e-2; + + // Constants holding start and end strings for terminal-output in colour + const std::string kPrintError{"\x1b[31m"}; + const std::string kPrintSuccess{"\x1b[32m"}; + const std::string kPrintWarning{"\x1b[35m"}; + const std::string kPrintMessage{"\x1b[1m"}; + const std::string kPrintEnd{"\x1b[0m"}; + + // Sets the output error coding + const std::string kSuccessData{kPrintSuccess + ":" + kPrintEnd}; + const std::string kSuccessStatus{kPrintSuccess + "." + kPrintEnd}; + const std::string kErrorData{kPrintError + "X" + kPrintEnd}; + const std::string kErrorStatus{kPrintError + "/" + kPrintEnd}; + const std::string kSkippedCompilation{kPrintWarning + "\\" + kPrintEnd}; + const std::string kUnsupportedPrecision{kPrintWarning + "o" + kPrintEnd}; + + // This structure combines the above log-entry with a status code an error percentage + struct ErrorLogEntry { + StatusCode status_expect; + StatusCode status_found; + float error_percentage; + Arguments<T> args; + }; + + // Creates an instance of the tester, running on a particular OpenCL platform and device. It + // takes the routine's names as an additional parameter. + explicit Tester(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options); + ~Tester(); + + // These methods start and end a test-case. Within a test-case, multiple tests can be run. + void TestStart(const std::string &test_name, const std::string &test_configuration); + void TestEnd(); + + // Compares two floating point values for similarity. Allows for a certain relative error margin. + static bool TestSimilarity(const T val1, const T val2, const double margin); + + // Tests either an error count (should be zero) or two error codes (must match) + void TestErrorCount(const size_t errors, const size_t size, const Arguments<T> &args); + void TestErrorCodes(const StatusCode clblas_status, const StatusCode clblast_status, + const Arguments<T> &args); + + protected: + + // Retrieves a list of example scalars of the right type + const std::vector<T> GetExampleScalars(); + + // The OpenCL objects (accessible by derived classes) + Platform platform_; + Device device_; + Context context_; + CommandQueue queue_; + + private: + + // Internal methods to report a passed, skipped, or failed test + void ReportPass(); + void ReportSkipped(); + void ReportError(const ErrorLogEntry &log_entry); + + // Prints the error or success symbol to screen + void PrintTestResult(const std::string &message); + + // Logging and counting occurrences of errors + std::vector<ErrorLogEntry> error_log_; + size_t num_passed_; + size_t num_skipped_; + size_t num_errors_; + + // Counting the amount of errors printed on this row + size_t print_count_; + + // Counting the number of test-cases with and without failures + size_t tests_failed_; + size_t tests_passed_; + + // Arguments relevant for a specific routine + std::vector<std::string> options_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_CORRECTNESS_TESTER_H_ +#endif diff --git a/test/correctness/testxy.cc b/test/correctness/testxy.cc new file mode 100644 index 00000000..0b708b3d --- /dev/null +++ b/test/correctness/testxy.cc @@ -0,0 +1,172 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the TestXY class (see the header for information about the class). +// +// ================================================================================================= + +#include <algorithm> + +#include "correctness/testxy.h" + +namespace clblast { +// ================================================================================================= + +// Constructor, initializes the base class tester and input data +template <typename T> +TestXY<T>::TestXY(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options, + const Routine clblast_lambda, const Routine clblas_lambda): + Tester<T>{platform_id, device_id, name, options}, + clblast_lambda_(clblast_lambda), + clblas_lambda_(clblas_lambda) { + + // Computes the maximum sizes. This allows for a single set of input/output buffers. + auto max_dim = *std::max_element(kVectorDims.begin(), kVectorDims.end()); + auto max_inc = *std::max_element(kIncrements.begin(), kIncrements.end()); + auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end()); + + // Creates test input data + x_source_.resize(max_dim*max_inc + max_offset); + y_source_.resize(max_dim*max_inc + max_offset); + PopulateVector(x_source_); + PopulateVector(y_source_); +} + +// =============================================================================================== + +// Tests the routine for a wide variety of parameters +template <typename T> +void TestXY<T>::TestRegular(Arguments<T> &args, const std::string &name) { + TestStart("regular behaviour", name); + + // Iterates over the vector dimension + for (auto &n: kVectorDims) { + args.n = n; + + // Iterates over the increment-values and the offsets + for (auto &x_inc: kIncrements) { + args.x_inc = x_inc; + for (auto &x_offset: kOffsets) { + args.x_offset = x_offset; + for (auto &y_inc: kIncrements) { + args.y_inc = y_inc; + for (auto &y_offset: kOffsets) { + args.y_offset = y_offset; + + // Computes the buffer sizes + auto x_size = n * x_inc + x_offset; + auto y_size = n * y_inc + y_offset; + if (x_size < 1 || y_size < 1) { continue; } + + // Creates the OpenCL buffers + auto x_vec = Buffer(context_, CL_MEM_READ_WRITE, x_size*sizeof(T)); + auto r_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T)); + auto s_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T)); + + // Iterates over the values for alpha + for (auto &alpha: kAlphaValues) { + args.alpha = alpha; + + // Runs the reference clBLAS code + x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_); + r_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_); + auto status1 = clblas_lambda_(args, x_vec, r_vec, queue_); + + // Runs the CLBlast code + x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_); + s_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_); + auto status2 = clblast_lambda_(args, x_vec, s_vec, queue_); + + // Tests for equality of the two status codes + if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) { + TestErrorCodes(status1, status2, args); + continue; + } + + // Downloads the results + std::vector<T> r_result(y_size, static_cast<T>(0)); + std::vector<T> s_result(y_size, static_cast<T>(0)); + r_vec.ReadBuffer(queue_, y_size*sizeof(T), r_result); + s_vec.ReadBuffer(queue_, y_size*sizeof(T), s_result); + + // Checks for differences in the output + auto errors = size_t{0}; + for (auto idn=size_t{0}; idn<n; ++idn) { + auto index = idn*y_inc + y_offset; + if (!TestSimilarity(r_result[index], s_result[index], kErrorMargin)) { + errors++; + } + } + + // Tests the error count (should be zero) + TestErrorCount(errors, n, args); + } + } + } + } + } + } + TestEnd(); +} + +// ================================================================================================= + +// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types, +// does not test for results (if any). +template <typename T> +void TestXY<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) { + TestStart("invalid buffer sizes", name); + + // Sets example test parameters + args.n = kBufferSize; + + // Iterates over test buffer sizes + const std::vector<size_t> kBufferSizes = {0, kBufferSize - 1, kBufferSize}; + for (auto &x_size: kBufferSizes) { + for (auto &y_size: kBufferSizes) { + + // Iterates over test increments + for (auto &x_inc: kInvalidIncrements) { + args.x_inc = x_inc; + for (auto &y_inc: kInvalidIncrements) { + args.y_inc = y_inc; + + // Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly + // want to be able to create invalid buffers (no error checking here). + auto x = clCreateBuffer(context_(), CL_MEM_READ_WRITE, x_size*sizeof(T), nullptr, nullptr); + auto x_vec = Buffer(x); + auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr); + auto r_vec = Buffer(r); + auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr); + auto s_vec = Buffer(s); + + // Runs the two routines + auto status1 = clblas_lambda_(args, x_vec, r_vec, queue_); + auto status2 = clblast_lambda_(args, x_vec, s_vec, queue_); + + // Tests for equality of the two status codes + TestErrorCodes(status1, status2, args); + } + } + } + } + TestEnd(); +} + +// ================================================================================================= + +// Compiles the templated class +template class TestXY<float>; +template class TestXY<double>; +template class TestXY<float2>; +template class TestXY<double2>; + +// ================================================================================================= +} // namespace clblast diff --git a/test/correctness/testxy.h b/test/correctness/testxy.h new file mode 100644 index 00000000..32cd91fa --- /dev/null +++ b/test/correctness/testxy.h @@ -0,0 +1,83 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file tests any vector-vector (X,Y) routine. It contains two types of tests: one testing +// all sorts of input combinations, and one deliberatly testing with invalid values. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_CORRECTNESS_TESTXY_H_ +#define CLBLAST_TEST_CORRECTNESS_TESTXY_H_ + +#include <vector> +#include <string> + +#include "correctness/tester.h" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class TestXY: public Tester<T> { + public: + + // Uses several variables from the Tester class + using Tester<T>::context_; + using Tester<T>::queue_; + using Tester<T>::kErrorMargin; + + // Uses several helper functions from the Tester class + using Tester<T>::TestStart; + using Tester<T>::TestEnd; + using Tester<T>::TestSimilarity; + using Tester<T>::TestErrorCount; + using Tester<T>::TestErrorCodes; + using Tester<T>::GetExampleScalars; + + // Test settings for the regular test. Append to this list in case more tests are required. + const std::vector<size_t> kVectorDims = { 7, 93, 4096 }; + const std::vector<size_t> kOffsets = { 0, 10 }; + const std::vector<size_t> kIncrements = { 1, 2 }; + const std::vector<T> kAlphaValues = GetExampleScalars(); + + // Test settings for the invalid test + const std::vector<size_t> kInvalidIncrements = { 0, 1 }; + const size_t kBufferSize = 512; + + // Shorthand for a BLAS routine + using Routine = std::function<StatusCode(const Arguments<T>&, + const Buffer&, const Buffer&, + CommandQueue&)>; + + // Constructor, initializes the base class tester and input data + TestXY(const size_t platform_id, const size_t device_id, + const std::string &name, const std::vector<std::string> &options, + const Routine clblast_lambda, const Routine clblas_lambda); + + // The test functions, taking no inputs + void TestRegular(Arguments<T> &args, const std::string &name); + void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name); + + private: + + // Source data to test with + std::vector<T> x_source_; + std::vector<T> y_source_; + + // The routines to test + Routine clblast_lambda_; + Routine clblas_lambda_; +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_CORRECTNESS_TESTXY_H_ +#endif diff --git a/test/performance/client.cc b/test/performance/client.cc new file mode 100644 index 00000000..ddaea0e1 --- /dev/null +++ b/test/performance/client.cc @@ -0,0 +1,295 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the common functions for the client-test environment. +// +// ================================================================================================= + +#include "performance/client.h" + +#include <string> +#include <vector> +#include <algorithm> +#include <chrono> + +namespace clblast { +// ================================================================================================= + +// This is the vector-vector variant of the set-up/tear-down client routine. +template <typename T> +void ClientXY(int argc, char *argv[], Routine2<T> client_routine, + const std::vector<std::string> &options) { + + // Simple command line argument parser with defaults + auto args = ParseArguments<T>(argc, argv, options); + if (args.print_help) { return; } + + // Prints the header of the output table + PrintTableHeader(args.silent, options); + + // Initializes OpenCL and the libraries + auto platform = Platform(args.platform_id); + auto device = Device(platform, kDeviceType, args.device_id); + auto context = Context(device); + auto queue = CommandQueue(context, device); + if (args.compare_clblas) { clblasSetup(); } + + // Iterates over all "num_step" values jumping by "step" each time + auto s = size_t{0}; + while(true) { + + // Computes the data sizes + auto x_size = args.n*args.x_inc + args.x_offset; + auto y_size = args.n*args.y_inc + args.y_offset; + + // Populates input host vectors with random data + std::vector<T> x_source(x_size); + std::vector<T> y_source(y_size); + PopulateVector(x_source); + PopulateVector(y_source); + + // Creates the vectors on the device + auto x_buffer = Buffer(context, CL_MEM_READ_WRITE, x_size*sizeof(T)); + auto y_buffer = Buffer(context, CL_MEM_READ_WRITE, y_size*sizeof(T)); + x_buffer.WriteBuffer(queue, x_size*sizeof(T), x_source); + y_buffer.WriteBuffer(queue, y_size*sizeof(T), y_source); + + // Runs the routine-specific code + client_routine(args, x_buffer, y_buffer, queue); + + // Makes the jump to the next step + ++s; + if (s >= args.num_steps) { break; } + args.n += args.step; + } + + // Cleans-up and returns + if (args.compare_clblas) { clblasTeardown(); } +} + +// Compiles the above function +template void ClientXY<float>(int, char **, Routine2<float>, const std::vector<std::string>&); +template void ClientXY<double>(int, char **, Routine2<double>, const std::vector<std::string>&); +template void ClientXY<float2>(int, char **, Routine2<float2>, const std::vector<std::string>&); +template void ClientXY<double2>(int, char **, Routine2<double2>, const std::vector<std::string>&); + +// ================================================================================================= + +// This is the matrix-matrix-matrix variant of the set-up/tear-down client routine. +template <typename T> +void ClientABC(int argc, char *argv[], Routine3<T> client_routine, + const std::vector<std::string> &options) { + + // Simple command line argument parser with defaults + auto args = ParseArguments<T>(argc, argv, options); + if (args.print_help) { return; } + + // Prints the header of the output table + PrintTableHeader(args.silent, options); + + // Initializes OpenCL and the libraries + auto platform = Platform(args.platform_id); + auto device = Device(platform, kDeviceType, args.device_id); + auto context = Context(device); + auto queue = CommandQueue(context, device); + if (args.compare_clblas) { clblasSetup(); } + + // Computes whether or not the matrices are transposed. Note that we assume a default of + // column-major and no-transpose. If one of them is different (but not both), then rotated + // is considered true. + auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose == Transpose::kYes) || + (args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo); + auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose == Transpose::kYes) || + (args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo); + auto c_rotated = (args.layout == Layout::kRowMajor); + + // Iterates over all "num_step" values jumping by "step" each time + auto s = size_t{0}; + while(true) { + + // Computes the data sizes + auto a_two = (a_rotated) ? args.m : args.k; + auto b_two = (b_rotated) ? args.k : args.n; + auto c_two = (c_rotated) ? args.m : args.n; + auto a_size = a_two * args.a_ld + args.a_offset; + auto b_size = b_two * args.b_ld + args.b_offset; + auto c_size = c_two * args.c_ld + args.c_offset; + + // Populates input host matrices with random data + std::vector<T> a_source(a_size); + std::vector<T> b_source(b_size); + std::vector<T> c_source(c_size); + PopulateVector(a_source); + PopulateVector(b_source); + PopulateVector(c_source); + + // Creates the matrices on the device + auto a_buffer = Buffer(context, CL_MEM_READ_WRITE, a_size*sizeof(T)); + auto b_buffer = Buffer(context, CL_MEM_READ_WRITE, b_size*sizeof(T)); + auto c_buffer = Buffer(context, CL_MEM_READ_WRITE, c_size*sizeof(T)); + a_buffer.WriteBuffer(queue, a_size*sizeof(T), a_source); + b_buffer.WriteBuffer(queue, b_size*sizeof(T), b_source); + c_buffer.WriteBuffer(queue, c_size*sizeof(T), c_source); + + // Runs the routine-specific code + client_routine(args, a_buffer, b_buffer, c_buffer, queue); + + // Makes the jump to the next step + ++s; + if (s >= args.num_steps) { break; } + args.m += args.step; + args.n += args.step; + args.k += args.step; + args.a_ld += args.step; + args.b_ld += args.step; + args.c_ld += args.step; + } + + // Cleans-up and returns + if (args.compare_clblas) { clblasTeardown(); } +} + +// Compiles the above function +template void ClientABC<float>(int, char **, Routine3<float>, const std::vector<std::string>&); +template void ClientABC<double>(int, char **, Routine3<double>, const std::vector<std::string>&); +template void ClientABC<float2>(int, char **, Routine3<float2>, const std::vector<std::string>&); +template void ClientABC<double2>(int, char **, Routine3<double2>, const std::vector<std::string>&); + +// ================================================================================================= + +// Parses all arguments available for the CLBlast client testers. Some arguments might not be +// applicable, but are searched for anyway to be able to create one common argument parser. All +// arguments have a default value in case they are not found. +template <typename T> +Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::string> &options) { + auto args = Arguments<T>{}; + auto help = std::string{"Options given/available:\n"}; + + // These are the options which are not for every client: they are optional + for (auto &o: options) { + + // Data-sizes + if (o == kArgM) { args.m = args.k = GetArgument(argc, argv, help, kArgM, 512UL); } + if (o == kArgN) { args.n = GetArgument(argc, argv, help, kArgN, 512UL); } + if (o == kArgK) { args.k = GetArgument(argc, argv, help, kArgK, 512UL); } + + // Data-layouts + if (o == kArgLayout) { args.layout = GetArgument(argc, argv, help, kArgLayout, Layout::kRowMajor); } + if (o == kArgATransp) { args.a_transpose = GetArgument(argc, argv, help, kArgATransp, Transpose::kNo); } + if (o == kArgBTransp) { args.b_transpose = GetArgument(argc, argv, help, kArgBTransp, Transpose::kNo); } + if (o == kArgSide) { args.side = GetArgument(argc, argv, help, kArgSide, Side::kLeft); } + if (o == kArgTriangle) { args.triangle = GetArgument(argc, argv, help, kArgTriangle, Triangle::kUpper); } + + // Vector arguments + if (o == kArgXInc) { args.x_inc = GetArgument(argc, argv, help, kArgXInc, size_t{1}); } + if (o == kArgYInc) { args.y_inc = GetArgument(argc, argv, help, kArgYInc, size_t{1}); } + if (o == kArgXOffset) { args.x_offset = GetArgument(argc, argv, help, kArgXOffset, size_t{0}); } + if (o == kArgYOffset) { args.y_offset = GetArgument(argc, argv, help, kArgYOffset, size_t{0}); } + + // Matrix arguments + if (o == kArgALeadDim) { args.a_ld = GetArgument(argc, argv, help, kArgALeadDim, args.k); } + if (o == kArgBLeadDim) { args.b_ld = GetArgument(argc, argv, help, kArgBLeadDim, args.n); } + if (o == kArgCLeadDim) { args.c_ld = GetArgument(argc, argv, help, kArgCLeadDim, args.n); } + if (o == kArgAOffset) { args.a_offset = GetArgument(argc, argv, help, kArgAOffset, size_t{0}); } + if (o == kArgBOffset) { args.b_offset = GetArgument(argc, argv, help, kArgBOffset, size_t{0}); } + if (o == kArgCOffset) { args.c_offset = GetArgument(argc, argv, help, kArgCOffset, size_t{0}); } + + // Scalar values + if (o == kArgAlpha) { args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar<T>()); } + if (o == kArgBeta) { args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar<T>()); } + } + + // These are the options common to all routines + args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0}); + args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0}); + args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle); + args.compare_clblas = GetArgument(argc, argv, help, kArgCompareclblas, true); + args.step = GetArgument(argc, argv, help, kArgStepSize, size_t{1}); + args.num_steps = GetArgument(argc, argv, help, kArgNumSteps, size_t{0}); + args.num_runs = GetArgument(argc, argv, help, kArgNumRuns, size_t{10}); + args.print_help = CheckArgument(argc, argv, help, kArgHelp); + args.silent = CheckArgument(argc, argv, help, kArgQuiet); + args.no_abbrv = CheckArgument(argc, argv, help, kArgNoAbbreviations); + + // Prints the chosen (or defaulted) arguments to screen. This also serves as the help message, + // which is thus always displayed (unless silence is specified). + if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); } + + // Returns the arguments + return args; +} + +// ================================================================================================= + +// Creates a vector of timing results, filled with execution times of the 'main computation'. The +// timing is performed using the milliseconds chrono functions. The function returns the minimum +// value found in the vector of timing results. The return value is in milliseconds. +double TimedExecution(const size_t num_runs, std::function<void()> main_computation) { + auto timings = std::vector<double>(num_runs); + for (auto &timing: timings) { + auto start_time = std::chrono::steady_clock::now(); + + // Executes the main computation + main_computation(); + + // Records and stores the end-time + auto elapsed_time = std::chrono::steady_clock::now() - start_time; + timing = std::chrono::duration<double,std::milli>(elapsed_time).count(); + } + return *std::min_element(timings.begin(), timings.end()); +} + +// ================================================================================================= + +// Prints the header of the performance table +void PrintTableHeader(const bool silent, const std::vector<std::string> &args) { + if (!silent) { + for (auto i=size_t{0}; i<args.size(); ++i) { fprintf(stdout, "%9s ", ""); } + fprintf(stdout, " | <-- CLBlast --> | <-- clBLAS --> |\n"); + } + for (auto &argument: args) { fprintf(stdout, "%9s;", argument.c_str()); } + fprintf(stdout, "%9s;%9s;%9s;%9s;%9s;%9s\n", + "ms_1", "GFLOPS_1", "GBs_1", "ms_2", "GFLOPS_2", "GBs_2"); +} + +// Print a performance-result row +void PrintTableRow(const std::vector<size_t> &args_int, const std::vector<std::string> &args_string, + const bool no_abbrv, const double ms_clblast, const double ms_clblas, + const unsigned long long flops, const unsigned long long bytes) { + + // Computes the GFLOPS and GB/s metrics + auto gflops_clblast = (ms_clblast != 0.0) ? (flops*1e-6)/ms_clblast : 0; + auto gflops_clblas = (ms_clblas != 0.0) ? (flops*1e-6)/ms_clblas: 0; + auto gbs_clblast = (ms_clblast != 0.0) ? (bytes*1e-6)/ms_clblast : 0; + auto gbs_clblas = (ms_clblas != 0.0) ? (bytes*1e-6)/ms_clblas: 0; + + // Outputs the argument values + for (auto &argument: args_int) { + if (!no_abbrv && argument >= 1024*1024 && IsMultiple(argument, 1024*1024)) { + fprintf(stdout, "%8luM;", argument/(1024*1024)); + } + else if (!no_abbrv && argument >= 1024 && IsMultiple(argument, 1024)) { + fprintf(stdout, "%8luK;", argument/1024); + } + else { + fprintf(stdout, "%9lu;", argument); + } + } + for (auto &argument: args_string) { + fprintf(stdout, "%9s;", argument.c_str()); + } + + // Outputs the performance numbers + fprintf(stdout, "%9.2lf;%9.1lf;%9.1lf;%9.2lf;%9.1lf;%9.1lf\n", + ms_clblast, gflops_clblast, gbs_clblast, + ms_clblas, gflops_clblas, gbs_clblas); +} + +// ================================================================================================= +} // namespace clblast diff --git a/test/performance/client.h b/test/performance/client.h new file mode 100644 index 00000000..2b9991fe --- /dev/null +++ b/test/performance/client.h @@ -0,0 +1,85 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file provides common function declarations to be used with the test clients. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_PERFORMANCE_CLIENT_H_ +#define CLBLAST_TEST_PERFORMANCE_CLIENT_H_ + +#include <string> +#include <vector> + +// The libraries to test +#include <clBLAS.h> +#include "clblast.h" + +#include "internal/utilities.h" + +namespace clblast { +// ================================================================================================= + +// Types of devices to consider +const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL; + +// ================================================================================================= + +// Shorthand for a BLAS routine with 2 or 3 OpenCL buffers as argument +template <typename T> +using Routine2 = std::function<void(const Arguments<T>&, + const Buffer&, const Buffer&, + CommandQueue&)>; +template <typename T> +using Routine3 = std::function<void(const Arguments<T>&, + const Buffer&, const Buffer&, const Buffer&, + CommandQueue&)>; + +// ================================================================================================= + +// These are the main client functions, setting-up arguments, matrices, OpenCL buffers, etc. After +// set-up, they call the client routine, passed as argument to this function. +template <typename T> +void ClientXY(int argc, char *argv[], Routine2<T> client_routine, + const std::vector<std::string> &options); +template <typename T> +void ClientABC(int argc, char *argv[], Routine3<T> client_routine, + const std::vector<std::string> &options); + +// ================================================================================================= + +// Parses all command-line arguments, filling in the arguments structure. If no command-line +// argument is given for a particular argument, it is filled in with a default value. +template <typename T> +Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::string> &options); + +// Retrieves only the precision command-line argument, since the above function is templated based +// on the precision +Precision GetPrecision(int argc, char *argv[]); + +// ================================================================================================= + +// Runs a function a given number of times and returns the execution time of the shortest instance +double TimedExecution(const size_t num_runs, std::function<void()> main_computation); + +// ================================================================================================= + +// Prints the header of a performance-data table +void PrintTableHeader(const bool silent, const std::vector<std::string> &args); + +// Prints a row of performance data, including results of two libraries +void PrintTableRow(const std::vector<size_t> &args_int, const std::vector<std::string> &args_string, + const bool abbreviations, const double ms_clblast, const double ms_clblas, + const unsigned long long flops, const unsigned long long bytes); + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_PERFORMANCE_CLIENT_H_ +#endif diff --git a/test/performance/graphs/common.r b/test/performance/graphs/common.r new file mode 100644 index 00000000..4572e559 --- /dev/null +++ b/test/performance/graphs/common.r @@ -0,0 +1,189 @@ + +# ================================================================================================== +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +# project uses a tab-size of two spaces and a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# This file implements the common performance scripts, such as creating a graph. +# +# ================================================================================================== + +# Colours +black = "#000000" +grey = "#888888" +purplish = "#550077" # [ 85, 0,119] lumi=26 +blueish = "#4765b1" # [ 71,101,177] lumi=100 +redish = "#d67568" # [214,117,104] lumi=136 +greenish = "#9bd4ca" # [155,212,202] lumi=199 +colourset = c(blueish, redish, greenish, purplish) + +# Sets the graph markers (circles, triangles, etc.) +pchs = c(15, 18, 17, 12) + +# Other constants +kilo = 1024 +mega = 1024*1024 + +# R options +options("width"=170) + +# ================================================================================================== + +# Constants +num_runs <- 4 +devices <- c("-platform","-device") +options_string <- "-q -no_abbrv" +library_names <- c("CLBlast", "clBLAS") + +# Command-line arguments +command_line <- commandArgs(trailingOnly=TRUE) +if (length(command_line) != 2) { + print("Usage for device Z on platform Y: Rscript xxxxx.r Y Z") + quit() +} +platform_id <- command_line[1] +device_id <- command_line[2] + +# Selects the device +devices_values <- c(platform_id, device_id) +devices_string <- paste(devices, devices_values, collapse=" ") + +# ================================================================================================== + +# The main function +main <- function(routine_name, precision, test_names, test_values, + test_xlabels, test_xaxis, metric_gflops) { + + # Names + display_name <- toupper(routine_name) + if (precision == 16) { display_name <- gsub("^X","H",display_name); } + if (precision == 32) { display_name <- gsub("^X","S",display_name); } + if (precision == 64) { display_name <- gsub("^X","D",display_name); } + if (precision == 3232) { display_name <- gsub("^X","C",display_name); } + if (precision == 6464) { display_name <- gsub("^X","Z",display_name); } + executable <- paste("./client_", routine_name, sep="") + + # Configures the outputfile + pdf(paste(display_name, ".pdf", sep=""), height=8, width=13) + par(mfrow=c(2, 3)) + par(oma=c(0, 0, 0, 0)) + par(mar=c(4.6, 4.4, 1.5, 0)) # bottom, left, top, right [c(5.1, 4.1, 4.1, 2.1)] + par(mgp=c(2.8, 0.6, 0)) # location of xlab/ylab, tick-mark labels, tick marks [c(3, 1, 0)] + + # Loops over the test-cases + for (test_id in 1:length(test_names)) { + params_values <- test_values[[test_id]] + + # Loops over the commands within a single list (within a case) + for (command_id in 1:length(params_values)) { + + # Runs the client and captures the result + params_string <- paste(parameters, params_values[[command_id]], collapse=" ") + arguments <- paste(devices_string, params_string, options_string, sep=" ") + result_string <- system2(command=executable, args=arguments, stdout=TRUE) + + # Reads the result into a dataframe + command_db <- read.csv(text=result_string, sep=";") + + # Append the results to the final dataframe + if (command_id == 1) { + db <- command_db + } else { + db <- rbind(db, command_db) + } + } + print(db) + + # Sets the values on the x-axis and their labels (test dependent) + if (is.character(test_xaxis[[test_id]][[1]])) { + xdata <- db[,test_xaxis[[test_id]][[1]]] + xtics <- xdata + log_scale <- test_xaxis[[test_id]][[2]] + } + else { + xdata <- test_xaxis[[test_id]][[1]] + xtics <- test_xaxis[[test_id]][[2]] + log_scale <- "" + } + + # Plots the graph with GFLOPS on the Y-axis + if (metric_gflops) { + plot_graph(xdata=xdata, ydata=list(db$GFLOPS_1, db$GFLOPS_2), log_setting=log_scale, + xmin=min(xdata), xmax=max(xdata), + ymin=0, ymax=max(max(db$GFLOPS_1),max(db$GFLOPS_2)), + xtics=xtics, + xlabel=test_xlabels[[test_id]], ylabel="GFLOPS (higher is better)", + graph_title=paste(display_name, test_names[[test_id]], sep=" "), + multiple=50, experiment_names=library_names) + # Plots the graph with GB/s on the Y-axis + } else { + plot_graph(xdata=xdata, ydata=list(db$GBs_1, db$GBs_2), log_setting=log_scale, + xmin=min(xdata), xmax=max(xdata), + ymin=0, ymax=max(max(db$GBs_1),max(db$GBs_2)), + xtics=xtics, + xlabel=test_xlabels[[test_id]], ylabel="GB/s (higher is better)", + graph_title=paste(display_name, test_names[[test_id]], sep=" "), + multiple=10, experiment_names=library_names) + } + } +} + +# ================================================================================================== + +# Plots data +plot_graph <- function(xdata, ydata, log_setting, + xmin, xmax, ymin, ymax, + xtics, xlabel, ylabel, + graph_title, + multiple, experiment_names) { + + # Update the ymax to the next multiple of something + ymax <- multiple*ceiling(ymax/multiple) + + # Add kilo or mega to the x-labels + for (i in 1:length(xtics)) { + if (!is.na(as.numeric(xtics[i]))) { + if (as.numeric(xtics[i])%%mega == 0) { + xtics[i] <- paste(as.character(as.numeric(xtics[i])/mega), "M", sep="") + } else if (as.numeric(xtics[i])%%kilo == 0) { + xtics[i] <- paste(as.character(as.numeric(xtics[i])/kilo), "K", sep="") + } + } + } + + # Creates an initial graph with axis but without data + par(new=F) + plot(x=xmin:xmax, y=rep(1, length(xmin:xmax)), log=log_setting, + main="", xlab="", ylab="", + ylim=c(ymin, ymax), xlim=c(xmin, xmax), axes=F, "n") + axis(side=2, las=2) + axis(side=1, at=xdata, labels=xtics, las=2) + title(xlab=xlabel, line=-1) + title(ylab=ylabel, line=2) + title(graph_title, line=-2) + par(new=T) + + # Loops over all experiments + num_experiments <- length(ydata) + for (id in 1:num_experiments) { + + # Plots the data for this experiment + plot(x=xdata, y=ydata[[id]], log=log_setting, + col=colourset[id], pch=pchs[id], lty=1, lwd=1, cex=1, + xlab="", ylab="", ylim=c(ymin, ymax), xlim=c(xmin, xmax), + axes=F, "b", xpd=T) + par(new=T) + } + + # Add a legend + legend("bottomright", experiment_names, + lwd=1, ncol=1, col=colourset, pch=pchs, lty=1, cex=1, + bty="n", xpd=T) + + # Done + par(new=F) +} + +# ================================================================================================== diff --git a/test/performance/graphs/xaxpy.r b/test/performance/graphs/xaxpy.r new file mode 100644 index 00000000..187590aa --- /dev/null +++ b/test/performance/graphs/xaxpy.r @@ -0,0 +1,96 @@ + +# ================================================================================================== +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +# project uses a tab-size of two spaces and a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# This file implements the performance script for the Xaxpy routine +# +# ================================================================================================== + +# Includes the common functions +args <- commandArgs(trailingOnly = FALSE) +thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)]))) +source(file.path(dirname(thisfile), "common.r")) + +# ================================================================================================== + +# Settings +routine_name <- "xaxpy" +parameters <- c("-n","-incx","-incy", + "-num_steps","-step","-runs","-precision") +precision <- 32 + +# Sets the names of the test-cases +test_names <- list( + "multiples of 256K", + "multiples of 256K (+1)", + "around n=1M", + "around n=16M", + "strides (n=8M)", + "powers of 2" +) + +# Defines the test-cases +test_values <- list( + list(c(256*kilo, 1, 1, 16, 256*kilo, num_runs, precision)), + list(c(256*kilo+1, 1, 1, 16, 256*kilo, num_runs, precision)), + list(c(1*mega, 1, 1, 16, 1, num_runs, precision)), + list(c(16*mega, 1, 1, 16, 1, num_runs, precision)), + list( + c(8*mega, 1, 1, 1, 0, num_runs, precision), + c(8*mega, 2, 1, 1, 0, num_runs, precision), + c(8*mega, 4, 1, 1, 0, num_runs, precision), + c(8*mega, 8, 1, 1, 0, num_runs, precision), + c(8*mega, 1, 2, 1, 0, num_runs, precision), + c(8*mega, 1, 4, 1, 0, num_runs, precision), + c(8*mega, 1, 8, 1, 0, num_runs, precision), + c(8*mega, 2, 2, 1, 0, num_runs, precision), + c(8*mega, 4, 4, 1, 0, num_runs, precision), + c(8*mega, 8, 8, 1, 0, num_runs, precision) + ), + list( + c(32*kilo, 1, 1, 1, 0, num_runs, precision), + c(64*kilo, 1, 1, 1, 0, num_runs, precision), + c(128*kilo, 1, 1, 1, 0, num_runs, precision), + c(256*kilo, 1, 1, 1, 0, num_runs, precision), + c(512*kilo, 1, 1, 1, 0, num_runs, precision), + c(1*mega, 1, 1, 1, 0, num_runs, precision), + c(2*mega, 1, 1, 1, 0, num_runs, precision), + c(4*mega, 1, 1, 1, 0, num_runs, precision), + c(8*mega, 1, 1, 1, 0, num_runs, precision), + c(16*mega, 1, 1, 1, 0, num_runs, precision), + c(32*mega, 1, 1, 1, 0, num_runs, precision), + c(64*mega, 1, 1, 1, 0, num_runs, precision) + ) +) + +# Defines the x-labels corresponding to the test-cases +test_xlabels <- list( + "vector sizes (n)", + "vector sizes (n)", + "vector sizes (n)", + "vector sizes (n)", + "increments/strides for x and y", + "vector sizes (n)" +) + +# Defines the x-axis of the test-cases +test_xaxis <- list( + c("n", ""), + c("n", ""), + c("n", ""), + c("n", ""), + list(1:10, c("x1y1", "x2y1", "x4y1", "x8y1", "x1y2", "x1y4", "x1y8", "x2y2", "x4y4", "x8y8")), + c("n", "x") +) + +# ================================================================================================== + +# Start the script +main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values, + test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=FALSE) + +# ==================================================================================================
\ No newline at end of file diff --git a/test/performance/graphs/xgemm.r b/test/performance/graphs/xgemm.r new file mode 100755 index 00000000..22f63b77 --- /dev/null +++ b/test/performance/graphs/xgemm.r @@ -0,0 +1,94 @@ + +# ================================================================================================== +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +# project uses a tab-size of two spaces and a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# This file implements the performance script for the Xgemm routine +# +# ================================================================================================== + +# Includes the common functions +args <- commandArgs(trailingOnly = FALSE) +thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)]))) +source(file.path(dirname(thisfile), "common.r")) + +# ================================================================================================== + +# Settings +routine_name <- "xgemm" +parameters <- c("-m","-n","-k","-layout","-transA","-transB", + "-num_steps","-step","-runs","-precision") +precision <- 32 + +# Sets the names of the test-cases +test_names <- list( + "multiples of 128", + "multiples of 128 (+1)", + "around m=n=k=512", + "around m=n=k=2048", + "layouts and transposing (m=n=k=1024)", + "powers of 2" +) + +# Defines the test-cases +test_values <- list( + list(c(128, 128, 128, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(129, 129, 129, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(512, 512, 512, 0, 0, 0, 16, 1, num_runs, precision)), + list(c(2048, 2048, 2048, 0, 0, 0, 16, 1, num_runs, precision)), + list( + c(1024, 1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 0, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 0, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 0, 1, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 1, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 1, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 1, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 1, 1, 1, 1, 0, num_runs, precision) + ), + list( + c(8, 8, 8, 0, 0, 0, 1, 0, num_runs, precision), + c(16, 16, 16, 0, 0, 0, 1, 0, num_runs, precision), + c(32, 32, 32, 0, 0, 0, 1, 0, num_runs, precision), + c(64, 64, 64, 0, 0, 0, 1, 0, num_runs, precision), + c(128, 128, 128, 0, 0, 0, 1, 0, num_runs, precision), + c(256, 256, 256, 0, 0, 0, 1, 0, num_runs, precision), + c(512, 512, 512, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(2048, 2048, 2048, 0, 0, 0, 1, 0, num_runs, precision), + c(4096, 4096, 4096, 0, 0, 0, 1, 0, num_runs, precision), + c(8192, 8192, 8192, 0, 0, 0, 1, 0, num_runs, precision) + ) +) + +# Defines the x-labels corresponding to the test-cases +test_xlabels <- list( + "matrix sizes (m=n=k)", + "matrix sizes (m=n=k)", + "matrix sizes (m=n=k)", + "matrix sizes (m=n=k)", + "layout (row/col), transA (n/y), transB (n/y)", + "matrix sizes (m=n=k)" +) + +# Defines the x-axis of the test-cases +test_xaxis <- list( + c("m", ""), + c("m", ""), + c("m", ""), + c("m", ""), + list(1:8, c("row,n,n", "row,n,y", "row,y,n", "row,y,y", + "col,n,n", "col,n,y", "col,y,n", "col,y,y")), + c("m", "x") +) + +# ================================================================================================== + +# Start the script +main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values, + test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE) + +# ==================================================================================================
\ No newline at end of file diff --git a/test/performance/graphs/xsymm.r b/test/performance/graphs/xsymm.r new file mode 100644 index 00000000..6493f52a --- /dev/null +++ b/test/performance/graphs/xsymm.r @@ -0,0 +1,94 @@ + +# ================================================================================================== +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +# project uses a tab-size of two spaces and a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# This file implements the performance script for the Xsymm routine +# +# ================================================================================================== + +# Includes the common functions +args <- commandArgs(trailingOnly = FALSE) +thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)]))) +source(file.path(dirname(thisfile), "common.r")) + +# ================================================================================================== + +# Settings +routine_name <- "xsymm" +parameters <- c("-m","-n","-layout","-triangle","-side", + "-num_steps","-step","-runs","-precision") +precision <- 32 + +# Sets the names of the test-cases +test_names <- list( + "multiples of 128", + "multiples of 128 (+1)", + "around m=n=512", + "around m=n=2048", + "layouts and triangle/side (m=n=1024)", + "powers of 2" +) + +# Defines the test-cases +test_values <- list( + list(c(128, 128, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(129, 129, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(512, 512, 0, 0, 0, 16, 1, num_runs, precision)), + list(c(2048, 2048, 0, 0, 0, 16, 1, num_runs, precision)), + list( + c(1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 0, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 1, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1, 1, 1, 1, 0, num_runs, precision) + ), + list( + c(8, 8, 0, 0, 0, 1, 0, num_runs, precision), + c(16, 16, 0, 0, 0, 1, 0, num_runs, precision), + c(32, 32, 0, 0, 0, 1, 0, num_runs, precision), + c(64, 64, 0, 0, 0, 1, 0, num_runs, precision), + c(128, 128, 0, 0, 0, 1, 0, num_runs, precision), + c(256, 256, 0, 0, 0, 1, 0, num_runs, precision), + c(512, 512, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(2048, 2048, 0, 0, 0, 1, 0, num_runs, precision), + c(4096, 4096, 0, 0, 0, 1, 0, num_runs, precision), + c(8192, 8192, 0, 0, 0, 1, 0, num_runs, precision) + ) +) + +# Defines the x-labels corresponding to the test-cases +test_xlabels <- list( + "matrix sizes (m=n)", + "matrix sizes (m=n)", + "matrix sizes (m=n)", + "matrix sizes (m=n)", + "layout (row/col), triangle (up/lo), side (l/r)", + "matrix sizes (m=n)" +) + +# Defines the x-axis of the test-cases +test_xaxis <- list( + c("m", ""), + c("m", ""), + c("m", ""), + c("m", ""), + list(1:8, c("row,up,l", "row,up,r", "row,lo,l", "row,lo,r", + "col,up,l", "col,up,r", "col,lo,l", "col,lo,r")), + c("m", "x") +) + +# ================================================================================================== + +# Start the script +main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values, + test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE) + +# ==================================================================================================
\ No newline at end of file diff --git a/test/performance/routines/xaxpy.cc b/test/performance/routines/xaxpy.cc new file mode 100644 index 00000000..23d76099 --- /dev/null +++ b/test/performance/routines/xaxpy.cc @@ -0,0 +1,97 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xaxpy command-line interface tester. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <exception> + +#include "wrapper_clblas.h" +#include "performance/client.h" + +namespace clblast { +// ================================================================================================= + +// The client, used for performance testing. It contains the function calls to CLBlast and to other +// libraries to compare against. +template <typename T> +void PerformanceXaxpy(const Arguments<T> &args, + const Buffer &x_vec, const Buffer &y_vec, + CommandQueue &queue) { + + // Creates the CLBlast lambda + auto clblast_lambda = [&args, &x_vec, &y_vec, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Axpy(args.n, args.alpha, + x_vec(), args.x_offset, args.x_inc, + y_vec(), args.y_offset, args.y_inc, + &queue_plain, &event); + clWaitForEvents(1, &event); + if (status != StatusCode::kSuccess) { + throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status))); + } + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [&args, &x_vec, &y_vec, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXaxpy(args.n, args.alpha, + x_vec(), args.x_offset, args.x_inc, + y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + if (status != CL_SUCCESS) { + throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status))); + } + }; + + // Runs the routines and collect the timings + auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda); + auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda); + + // Prints the performance of both libraries + const auto flops = 2 * args.n; + const auto bytes = (3 * args.n) * sizeof(T); + const auto output_ints = std::vector<size_t>{args.n, args.x_inc, args.y_inc, + args.x_offset, args.y_offset}; + const auto output_strings = std::vector<std::string>{ToString(args.alpha)}; + PrintTableRow(output_ints, output_strings, args.no_abbrv, + ms_clblast, ms_clblas, flops, bytes); +} + +// ================================================================================================= + +// Main function which calls the common client code with the routine-specific function as argument. +void ClientXaxpy(int argc, char *argv[]) { + const auto o = std::vector<std::string>{kArgN, kArgXInc, kArgYInc, + kArgXOffset, kArgYOffset, kArgAlpha}; + switch(GetPrecision(argc, argv)) { + case Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case Precision::kSingle: ClientXY<float>(argc, argv, PerformanceXaxpy<float>, o); break; + case Precision::kDouble: ClientXY<double>(argc, argv, PerformanceXaxpy<double>, o); break; + case Precision::kComplexSingle: ClientXY<float2>(argc, argv, PerformanceXaxpy<float2>, o); break; + case Precision::kComplexDouble: ClientXY<double2>(argc, argv, PerformanceXaxpy<double2>, o); break; + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::ClientXaxpy(argc, argv); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/xgemm.cc b/test/performance/routines/xgemm.cc new file mode 100644 index 00000000..234e9fdb --- /dev/null +++ b/test/performance/routines/xgemm.cc @@ -0,0 +1,115 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xgemm command-line interface tester. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <exception> + +#include "wrapper_clblas.h" +#include "performance/client.h" + +namespace clblast { +// ================================================================================================= + +// The client, used for performance testing. It contains the function calls to CLBlast and to other +// libraries to compare against. +template <typename T> +void PerformanceXgemm(const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) { + + // Creates the CLBlast lambda + auto clblast_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + if (status != StatusCode::kSuccess) { + throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status))); + } + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgemm(static_cast<clblasOrder>(args.layout), + static_cast<clblasTranspose>(args.a_transpose), + static_cast<clblasTranspose>(args.b_transpose), + args.m, args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + if (status != CL_SUCCESS) { + throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status))); + } + }; + + // Runs the routines and collect the timings + auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda); + auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda); + + // Prints the performance of both libraries + const auto flops = 2 * args.m * args.n * args.k; + const auto bytes = (args.m*args.k + args.k*args.n + args.m*args.n) * sizeof(T); + const auto output_ints = std::vector<size_t>{args.m, args.n, args.k, + static_cast<size_t>(args.layout), + static_cast<size_t>(args.a_transpose), + static_cast<size_t>(args.b_transpose), + args.a_ld, args.b_ld, args.c_ld, + args.a_offset, args.b_offset, args.c_offset}; + const auto output_strings = std::vector<std::string>{ToString(args.alpha), + ToString(args.beta)}; + PrintTableRow(output_ints, output_strings, args.no_abbrv, + ms_clblast, ms_clblas, flops, bytes); +} + +// ================================================================================================= + +// Main function which calls the common client code with the routine-specific function as argument. +void ClientXgemm(int argc, char *argv[]) { + const auto o = std::vector<std::string>{kArgM, kArgN, kArgK, kArgLayout, + kArgATransp, kArgBTransp, + kArgALeadDim, kArgBLeadDim, kArgCLeadDim, + kArgAOffset, kArgBOffset, kArgCOffset, + kArgAlpha, kArgBeta}; + switch(GetPrecision(argc, argv)) { + case Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case Precision::kSingle: ClientABC<float>(argc, argv, PerformanceXgemm<float>, o); break; + case Precision::kDouble: ClientABC<double>(argc, argv, PerformanceXgemm<double>, o); break; + case Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::ClientXgemm(argc, argv); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/xsymm.cc b/test/performance/routines/xsymm.cc new file mode 100644 index 00000000..13ad434a --- /dev/null +++ b/test/performance/routines/xsymm.cc @@ -0,0 +1,115 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsymm command-line interface tester. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <exception> + +#include "wrapper_clblas.h" +#include "performance/client.h" + +namespace clblast { +// ================================================================================================= + +// The client, used for performance testing. It contains the function calls to CLBlast and to other +// libraries to compare against. +template <typename T> +void PerformanceXsymm(const Arguments<T> &args, + const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat, + CommandQueue &queue) { + + // Creates the CLBlast lambda + auto clblast_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Symm(args.layout, args.side, args.triangle, + args.m, args.n, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + if (status != StatusCode::kSuccess) { + throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status))); + } + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsymm(static_cast<clblasOrder>(args.layout), + static_cast<clblasSide>(args.side), + static_cast<clblasUplo>(args.triangle), + args.m, args.n, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + b_mat(), args.b_offset, args.b_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + if (status != CL_SUCCESS) { + throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status))); + } + }; + + // Runs the routines and collect the timings + auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda); + auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda); + + // Prints the performance of both libraries + const auto flops = 2 * args.m * args.n * args.m; + const auto bytes = (args.m*args.m + args.m*args.n + args.m*args.n) * sizeof(T); + const auto output_ints = std::vector<size_t>{args.m, args.n, + static_cast<size_t>(args.layout), + static_cast<size_t>(args.triangle), + static_cast<size_t>(args.side), + args.a_ld, args.b_ld, args.c_ld, + args.a_offset, args.b_offset, args.c_offset}; + const auto output_strings = std::vector<std::string>{ToString(args.alpha), + ToString(args.beta)}; + PrintTableRow(output_ints, output_strings, args.no_abbrv, + ms_clblast, ms_clblas, flops, bytes); +} + +// ================================================================================================= + +// Main function which calls the common client code with the routine-specific function as argument. +void ClientXsymm(int argc, char *argv[]) { + const auto o = std::vector<std::string>{kArgM, kArgN, kArgLayout, + kArgTriangle, kArgSide, + kArgALeadDim, kArgBLeadDim, kArgCLeadDim, + kArgAOffset, kArgBOffset, kArgCOffset, + kArgAlpha, kArgBeta}; + switch(GetPrecision(argc, argv)) { + case Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case Precision::kSingle: ClientABC<float>(argc, argv, PerformanceXsymm<float>, o); break; + case Precision::kDouble: ClientABC<double>(argc, argv, PerformanceXsymm<double>, o); break; + case Precision::kComplexSingle: throw std::runtime_error("Unsupported precision mode"); + case Precision::kComplexDouble: throw std::runtime_error("Unsupported precision mode"); + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::ClientXsymm(argc, argv); + return 0; +} + +// ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h new file mode 100644 index 00000000..7c71fcaa --- /dev/null +++ b/test/wrapper_clblas.h @@ -0,0 +1,216 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under the MIT license. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a wrapper around the clBLAS library, such that its routines can be called +// in a similar way as the CLBlast routines: using alpha and beta to determine the precision. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_WRAPPER_CLBLAS_H_ +#define CLBLAST_TEST_WRAPPER_CLBLAS_H_ + +#include <clBLAS.h> + +#include "internal/utilities.h" + +namespace clblast { +// ================================================================================================= +// BLAS level-1 (vector-vector) routines + +// Calls {clblasSaxpy, clblasDaxpy, clblasCaxpy, clblasZaxpy} with the arguments forwarded. +clblasStatus clblasXaxpy( + size_t n, float alpha, + const cl_mem x_vec, size_t x_offset, size_t x_inc, + const cl_mem y_vec, size_t y_offset, size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSaxpy(n, alpha, + x_vec, x_offset, static_cast<int>(x_inc), + y_vec, y_offset, static_cast<int>(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy( + size_t n, double alpha, + const cl_mem x_vec, size_t x_offset, size_t x_inc, + const cl_mem y_vec, size_t y_offset, size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDaxpy(n, alpha, + x_vec, x_offset, static_cast<int>(x_inc), + y_vec, y_offset, static_cast<int>(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy( + size_t n, float2 alpha, + const cl_mem x_vec, size_t x_offset, size_t x_inc, + const cl_mem y_vec, size_t y_offset, size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; + return clblasCaxpy(n, cl_alpha, + x_vec, x_offset, static_cast<int>(x_inc), + y_vec, y_offset, static_cast<int>(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXaxpy( + size_t n, double2 alpha, + const cl_mem x_vec, size_t x_offset, size_t x_inc, + const cl_mem y_vec, size_t y_offset, size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; + return clblasZaxpy(n, cl_alpha, + x_vec, x_offset, static_cast<int>(x_inc), + y_vec, y_offset, static_cast<int>(y_inc), + num_queues, queues, num_wait_events, wait_events, events); +} + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines + +// This calls {clblasSgemm, clblasDgemm, clblasCgemm, clblasZgemm} with the arguments forwarded. +clblasStatus clblasXgemm( + clblasOrder layout, clblasTranspose tran_a, clblasTranspose tran_b, + size_t m, size_t n, size_t k, float alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSgemm(layout, tran_a, tran_b, + m, n, k, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemm( + clblasOrder layout, clblasTranspose tran_a, clblasTranspose tran_b, + size_t m, size_t n, size_t k, double alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDgemm(layout, tran_a, tran_b, + m, n, k, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemm( + clblasOrder layout, clblasTranspose tran_a, clblasTranspose tran_b, + size_t m, size_t n, size_t k, float2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; + return clblasCgemm(layout, tran_a, tran_b, + m, n, k, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXgemm( + clblasOrder layout, clblasTranspose tran_a, clblasTranspose tran_b, + size_t m, size_t n, size_t k, double2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; + return clblasZgemm(layout, tran_a, tran_b, + m, n, k, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} + +// This calls {clblasSsymm, clblasDsymm} with the arguments forwarded. +clblasStatus clblasXsymm( + clblasOrder layout, clblasSide side, clblasUplo triangle, + size_t m, size_t n, float alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasSsymm(layout, side, triangle, + m, n, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm( + clblasOrder layout, clblasSide side, clblasUplo triangle, + size_t m, size_t n, double alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasDsymm(layout, side, triangle, + m, n, alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm( + clblasOrder layout, clblasSide side, clblasUplo triangle, + size_t m, size_t n, float2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, float2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_float2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_float2{{beta.real(), beta.imag()}}; + return clblasCsymm(layout, side, triangle, + m, n, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} +clblasStatus clblasXsymm( + clblasOrder layout, clblasSide side, clblasUplo triangle, + size_t m, size_t n, double2 alpha, + const cl_mem a_mat, size_t a_offset, size_t a_ld, + const cl_mem b_mat, size_t b_offset, size_t b_ld, double2 beta, + cl_mem c_mat, size_t c_offset, size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto cl_alpha = cl_double2{{alpha.real(), alpha.imag()}}; + auto cl_beta = cl_double2{{beta.real(), beta.imag()}}; + return clblasZsymm(layout, side, triangle, + m, n, cl_alpha, + a_mat, a_offset, a_ld, + b_mat, b_offset, b_ld, cl_beta, + c_mat, c_offset, c_ld, + num_queues, queues, num_wait_events, wait_events, events); +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_WRAPPER_CLBLAS_H_ +#endif |