diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-04-20 22:14:55 -0600 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-04-20 22:14:55 -0600 |
commit | c8e28a33c0b786aa3772b6f6d822245b5f2ad9ea (patch) | |
tree | bd7df539c92b92cb8841ff27fd83c4e14e5958bc /test | |
parent | a61724ece50ab895a67bc15ae3a132d0ecbe61bc (diff) | |
parent | 16a048f1ac8102ad4bcce9cf9fc320f791323e45 (diff) |
Merge branch 'level1_routines' into development
Diffstat (limited to 'test')
-rw-r--r-- | test/correctness/routines/level1/xamax.cc | 28 | ||||
-rw-r--r-- | test/correctness/routines/level1/xasum.cc | 28 | ||||
-rw-r--r-- | test/correctness/testblas.h | 16 | ||||
-rw-r--r-- | test/performance/client.cc | 7 | ||||
-rw-r--r-- | test/performance/routines/level1/xamax.cc | 35 | ||||
-rw-r--r-- | test/performance/routines/level1/xasum.cc | 35 | ||||
-rw-r--r-- | test/routines/level1/xamax.h | 139 | ||||
-rw-r--r-- | test/routines/level1/xasum.h | 139 | ||||
-rw-r--r-- | test/wrapper_cblas.h | 52 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 136 |
10 files changed, 610 insertions, 5 deletions
diff --git a/test/correctness/routines/level1/xamax.cc b/test/correctness/routines/level1/xamax.cc new file mode 100644 index 00000000..ade09e7a --- /dev/null +++ b/test/correctness/routines/level1/xamax.cc @@ -0,0 +1,28 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xamax.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXamax<float>, float, float>(argc, argv, false, "iSAMAX"); + clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX"); + clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX"); + clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/routines/level1/xasum.cc b/test/correctness/routines/level1/xasum.cc new file mode 100644 index 00000000..5ec20596 --- /dev/null +++ b/test/correctness/routines/level1/xasum.cc @@ -0,0 +1,28 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xasum.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXasum<float>, float, float>(argc, argv, false, "SASUM"); + clblast::RunTests<clblast::TestXasum<double>, double, double>(argc, argv, true, "DASUM"); + clblast::RunTests<clblast::TestXasum<float2>, float2, float2>(argc, argv, true, "ScASUM"); + clblast::RunTests<clblast::TestXasum<double2>, double2, double2>(argc, argv, true, "DzASUM"); + return 0; +} + +// ================================================================================================= diff --git a/test/correctness/testblas.h b/test/correctness/testblas.h index 8181aaf6..13be921a 100644 --- a/test/correctness/testblas.h +++ b/test/correctness/testblas.h @@ -153,6 +153,8 @@ void RunTests(int argc, char *argv[], const bool silent, const std::string &name auto ap_offsets = std::vector<size_t>{args.ap_offset}; auto dot_offsets = std::vector<size_t>{args.dot_offset}; auto nrm2_offsets = std::vector<size_t>{args.nrm2_offset}; + auto asum_offsets = std::vector<size_t>{args.asum_offset}; + auto imax_offsets = std::vector<size_t>{args.imax_offset}; auto alphas = std::vector<U>{args.alpha}; auto betas = std::vector<U>{args.beta}; auto x_sizes = std::vector<size_t>{args.x_size}; @@ -193,6 +195,8 @@ void RunTests(int argc, char *argv[], const bool silent, const std::string &name if (option == kArgAPOffset) { ap_offsets = tester.kOffsets; } if (option == kArgDotOffset) { dot_offsets = tester.kOffsets; } if (option == kArgNrm2Offset) { nrm2_offsets = tester.kOffsets; } + if (option == kArgAsumOffset) { asum_offsets = tester.kOffsets; } + if (option == kArgImaxOffset) { imax_offsets = tester.kOffsets; } if (option == kArgAlpha) { alphas = tester.kAlphaValues; } if (option == kArgBeta) { betas = tester.kBetaValues; } @@ -233,10 +237,14 @@ void RunTests(int argc, char *argv[], const bool silent, const std::string &name for (auto &ap_offset: ap_offsets) { r_args.ap_offset = ap_offset; for (auto &dot_offset: dot_offsets) { r_args.dot_offset = dot_offset; for (auto &nrm2_offset: nrm2_offsets) { r_args.nrm2_offset = nrm2_offset; - for (auto &alpha: alphas) { r_args.alpha = alpha; - for (auto &beta: betas) { r_args.beta = beta; - C::SetSizes(r_args); - regular_test_vector.push_back(r_args); + for (auto &asum_offset: asum_offsets) { r_args.asum_offset = asum_offset; + for (auto &imax_offset: imax_offsets) { r_args.imax_offset = imax_offset; + for (auto &alpha: alphas) { r_args.alpha = alpha; + for (auto &beta: betas) { r_args.beta = beta; + C::SetSizes(r_args); + regular_test_vector.push_back(r_args); + } + } } } } diff --git a/test/performance/client.cc b/test/performance/client.cc index 56ab8c8d..9aaf1e4e 100644 --- a/test/performance/client.cc +++ b/test/performance/client.cc @@ -80,8 +80,11 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const GetMetric if (o == kArgCOffset) { args.c_offset = GetArgument(argc, argv, help, kArgCOffset, size_t{0}); } if (o == kArgAPOffset) { args.ap_offset= GetArgument(argc, argv, help, kArgAPOffset, size_t{0}); } - // Dot arguments + // Scalar result arguments if (o == kArgDotOffset) { args.dot_offset = GetArgument(argc, argv, help, kArgDotOffset, size_t{0}); } + if (o == kArgNrm2Offset) { args.nrm2_offset = GetArgument(argc, argv, help, kArgNrm2Offset, size_t{0}); } + if (o == kArgAsumOffset) { args.asum_offset = GetArgument(argc, argv, help, kArgAsumOffset, size_t{0}); } + if (o == kArgImaxOffset) { args.imax_offset = GetArgument(argc, argv, help, kArgImaxOffset, size_t{0}); } // Scalar values if (o == kArgAlpha) { args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar<U>()); } @@ -292,6 +295,8 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args, else if (o == kArgAPOffset) { integers.push_back(args.ap_offset); } else if (o == kArgDotOffset) {integers.push_back(args.dot_offset); } else if (o == kArgNrm2Offset){integers.push_back(args.nrm2_offset); } + else if (o == kArgAsumOffset){integers.push_back(args.asum_offset); } + else if (o == kArgImaxOffset){integers.push_back(args.imax_offset); } } auto strings = std::vector<std::string>{}; for (auto &o: options_) { diff --git a/test/performance/routines/level1/xamax.cc b/test/performance/routines/level1/xamax.cc new file mode 100644 index 00000000..85caa483 --- /dev/null +++ b/test/performance/routines/level1/xamax.cc @@ -0,0 +1,35 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xamax.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXamax<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: + clblast::RunClient<clblast::TestXamax<float2>, float2, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: + clblast::RunClient<clblast::TestXamax<double2>, double2, double2>(argc, argv); break; + } + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xasum.cc b/test/performance/routines/level1/xasum.cc new file mode 100644 index 00000000..2680966e --- /dev/null +++ b/test/performance/routines/level1/xasum.cc @@ -0,0 +1,35 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xasum.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXasum<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXasum<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: + clblast::RunClient<clblast::TestXasum<float2>, float2, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: + clblast::RunClient<clblast::TestXasum<double2>, double2, double2>(argc, argv); break; + } + return 0; +} + +// ================================================================================================= diff --git a/test/routines/level1/xamax.h b/test/routines/level1/xamax.h new file mode 100644 index 00000000..7b404dc3 --- /dev/null +++ b/test/routines/level1/xamax.h @@ -0,0 +1,139 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a class with static methods to describe the Xamax routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XAMAX_H_ +#define CLBLAST_TEST_ROUTINES_XAMAX_H_ + +#include <vector> +#include <string> + +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class TestXamax { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 1; } + + // The list of arguments relevant for this routine + static std::vector<std::string> GetOptions() { + return {kArgN, + kArgXInc, + kArgXOffset, kArgImaxOffset}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments<T> &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeImax(const Arguments<T> &args) { + return 1 + args.imax_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments<T> &args) { + args.x_size = GetSizeX(args); + args.scalar_size = GetSizeImax(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector<Transpose>; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Amax<T>(args.n, + buffers.scalar(), args.imax_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXamax<T>(args.n, + buffers.scalar(), args.imax_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast<StatusCode>(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0)); + std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXamax(args.n, + scalar_cpu, args.imax_offset, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> result(args.scalar_size, static_cast<T>(0)); + buffers.scalar.Read(queue, args.scalar_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments<T> &args, const size_t, const size_t) { + return args.imax_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments<T> &args) { + return args.n; + } + static size_t GetBytes(const Arguments<T> &args) { + return ((args.n) + 1) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XAMAX_H_ +#endif diff --git a/test/routines/level1/xasum.h b/test/routines/level1/xasum.h new file mode 100644 index 00000000..6eae3c83 --- /dev/null +++ b/test/routines/level1/xasum.h @@ -0,0 +1,139 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a class with static methods to describe the Xasum routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XASUM_H_ +#define CLBLAST_TEST_ROUTINES_XASUM_H_ + +#include <vector> +#include <string> + +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class TestXasum { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 1; } + + // The list of arguments relevant for this routine + static std::vector<std::string> GetOptions() { + return {kArgN, + kArgXInc, + kArgXOffset, kArgAsumOffset}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments<T> &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeAsum(const Arguments<T> &args) { + return 1 + args.asum_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments<T> &args) { + args.x_size = GetSizeX(args); + args.scalar_size = GetSizeAsum(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector<Transpose>; + static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Asum<T>(args.n, + buffers.scalar(), args.asum_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + clWaitForEvents(1, &event); + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXasum<T>(args.n, + buffers.scalar(), args.asum_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast<StatusCode>(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0)); + std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXasum(args.n, + scalar_cpu, args.asum_offset, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> result(args.scalar_size, static_cast<T>(0)); + buffers.scalar.Read(queue, args.scalar_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments<T> &args, const size_t, const size_t) { + return args.asum_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments<T> &args) { + return args.n; + } + static size_t GetBytes(const Arguments<T> &args) { + return ((args.n) + 1) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XASUM_H_ +#endif diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h index dec272b0..994b48b1 100644 --- a/test/wrapper_cblas.h +++ b/test/wrapper_cblas.h @@ -345,6 +345,58 @@ void cblasXnrm2(const size_t n, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } +// Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM +void cblasXasum(const size_t n, + std::vector<float>& asum_buffer, const size_t asum_offset, + const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { + asum_buffer[asum_offset] = cblas_sasum(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXasum(const size_t n, + std::vector<double>& asum_buffer, const size_t asum_offset, + const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { + asum_buffer[asum_offset] = cblas_dasum(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXasum(const size_t n, + std::vector<float2>& asum_buffer, const size_t asum_offset, + const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { + asum_buffer[asum_offset] = cblas_scasum(n, + reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} +void cblasXasum(const size_t n, + std::vector<double2>& asum_buffer, const size_t asum_offset, + const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { + asum_buffer[asum_offset] = cblas_dzasum(n, + reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} + +// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX +void cblasXamax(const size_t n, + std::vector<float>& imax_buffer, const size_t imax_offset, + const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_isamax(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<double>& imax_buffer, const size_t imax_offset, + const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_idamax(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<float2>& imax_buffer, const size_t imax_offset, + const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_icamax(n, + reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<double2>& imax_buffer, const size_t imax_offset, + const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_izamax(n, + reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 89b708b8..a44466c6 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -558,6 +558,142 @@ clblasStatus clblasXnrm2<double2>(const size_t n, num_queues, queues, num_wait_events, wait_events, events); } +// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM +template <typename T> +clblasStatus clblasXasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXasum<float>(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float>(context, n); + return clblasSasum(n, + asum_buffer, asum_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXasum<double>(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double>(context, n); + return clblasDasum(n, + asum_buffer, asum_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXasum<float2>(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float2>(context, n); + return clblasScasum(n, + asum_buffer, asum_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXasum<double2>(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double2>(context, n); + return clblasDzasum(n, + asum_buffer, asum_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} + +// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX +template <typename T> +clblasStatus clblasXamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXamax<float>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float>(context, 2*n); + return clblasiSamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<double>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double>(context, 2*n); + return clblasiDamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<float2>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float2>(context, 2*n); + return clblasiCamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<double2>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double2>(context, 2*n); + return clblasiZamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= |