diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-04 15:21:33 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-03-04 15:21:33 +0100 |
commit | e993ee077b50d3a6134309d465a4174b5c749596 (patch) | |
tree | b967f2702b90d8080a3e3cb41b9cbc01ab9eddc3 | |
parent | 3fc73851f7ed885335940eb85e53069638567323 (diff) |
Added a proper data-preparation function for the TRSM tests
51 files changed, 484 insertions, 88 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl index c59bcbcb..55f4a963 100644 --- a/src/kernels/level3/invert_diagonal_blocks.opencl +++ b/src/kernels/level3/invert_diagonal_blocks.opencl @@ -140,7 +140,9 @@ void InvertDiagonalBlock(int n, __global const real* restrict src, const int src for (int k = j + 1; k < INTERNAL_BLOCK_SIZE; ++k) { MultiplyAdd(sum, lm[thread_index][k], lm[k][j]); } - Multiply(lm[thread_index][j], -lm[j][j], sum); + real diagonal_value = lm[j][j]; + Negate(diagonal_value); + Multiply(lm[thread_index][j], diagonal_value, sum); } barrier(CLK_LOCAL_MEM_FENCE); } diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index 9cf75490..d68cc1a6 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -55,29 +55,35 @@ template <> half ConstantNegOne() { return FloatToHalf(-1.0f); } template <> float2 ConstantNegOne() { return {-1.0f, 0.0f}; } template <> double2 ConstantNegOne() { return {-1.0, 0.0}; } -// Returns a scalar of value 1 -template <typename T> T ConstantTwo() { return static_cast<T>(2.0); } -template float ConstantTwo<float>(); -template double ConstantTwo<double>(); -template <> half ConstantTwo() { return FloatToHalf(2.0f); } -template <> float2 ConstantTwo() { return {2.0f, 0.0f}; } -template <> double2 ConstantTwo() { return {2.0, 0.0}; } +// Returns a scalar of some value +template <typename T> T Constant(const double val) { return static_cast<T>(val); } +template float Constant<float>(const double); +template double Constant<double>(const double); +template <> half Constant(const double val) { return FloatToHalf(static_cast<float>(val)); } +template <> float2 Constant(const double val) { return {static_cast<float>(val), 0.0f}; } +template <> double2 Constant(const double val) { return {val, 0.0}; } // Returns a small scalar value just larger than 0 -template <typename T> T SmallConstant() { return static_cast<T>(1e7); } +template <typename T> T SmallConstant() { return static_cast<T>(1e-4); } template float SmallConstant<float>(); template double SmallConstant<double>(); -template <> half SmallConstant() { return FloatToHalf(1e7); } -template <> float2 SmallConstant() { return {1e7, 0.0f}; } -template <> double2 SmallConstant() { return {1e7, 0.0}; } +template <> half SmallConstant() { return FloatToHalf(1e-4); } +template <> float2 SmallConstant() { return {1e-4, 0.0f}; } +template <> double2 SmallConstant() { return {1e-4, 0.0}; } -// Returns the absolute value of a scalar -template <typename T> T AbsoluteValue(const T value) { return std::fabs(value); } +// Returns the absolute value of a scalar (modulus in case of a complex number) +template <typename T> typename BaseType<T>::Type AbsoluteValue(const T value) { return std::fabs(value); } template float AbsoluteValue<float>(const float); template double AbsoluteValue<double>(const double); template <> half AbsoluteValue(const half value) { return FloatToHalf(std::fabs(HalfToFloat(value))); } -template <> float2 AbsoluteValue(const float2 value) { return std::abs(value); } -template <> double2 AbsoluteValue(const double2 value) { return std::abs(value); } +template <> float AbsoluteValue(const float2 value) { + if (value.real() == 0.0f && value.imag() == 0.0f) { return 0.0f; } + return std::sqrt(value.real() * value.real() + value.imag() * value.imag()); +} +template <> double AbsoluteValue(const double2 value) { + if (value.real() == 0.0 && value.imag() == 0.0) { return 0.0; } + return std::sqrt(value.real() * value.real() + value.imag() * value.imag()); +} // Returns whether a scalar is close to zero template <typename T> bool IsCloseToZero(const T value) { return (value > -SmallConstant<T>()) && (value < SmallConstant<T>()); } diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 044955ea..3c9be6a2 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -98,6 +98,13 @@ constexpr auto kArgNoAbbreviations = "no_abbrv"; // ================================================================================================= +// Converts a regular or complex type to it's base type (e.g. float2 to float) +template <typename T> struct BaseType { using Type = T; }; +template <> struct BaseType<float2> { using Type = float; }; +template <> struct BaseType<double2> { using Type = double; }; + +// ================================================================================================= + // Returns a scalar with a default value template <typename T> T GetScalar(); @@ -105,11 +112,11 @@ template <typename T> T GetScalar(); template <typename T> T ConstantZero(); template <typename T> T ConstantOne(); template <typename T> T ConstantNegOne(); -template <typename T> T ConstantTwo(); +template <typename T> T Constant(const double val); template <typename T> T SmallConstant(); -// Returns the absolute value of a scalar -template <typename T> T AbsoluteValue(const T value); +// Returns the absolute value of a scalar (modulus in case of complex numbers) +template <typename T> typename BaseType<T>::Type AbsoluteValue(const T value); // Returns whether a scalar is close to zero template <typename T> bool IsCloseToZero(const T value); diff --git a/test/correctness/testblas.cpp b/test/correctness/testblas.cpp index 5207c0ab..d959ce18 100644 --- a/test/correctness/testblas.cpp +++ b/test/correctness/testblas.cpp @@ -51,6 +51,7 @@ template <> const std::vector<Transpose> TestBlas<double2,double>::kTransposes = template <typename T, typename U> TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool silent, const std::string &name, const std::vector<std::string> &options, + const DataPrepare prepare_data, const Routine run_routine, const Routine run_reference1, const Routine run_reference2, const ResultGet get_result, const ResultIndex get_index, @@ -59,6 +60,7 @@ TestBlas<T,U>::TestBlas(const std::vector<std::string> &arguments, const bool si kOffsets(GetOffsets()), kAlphaValues(GetExampleScalars<U>(full_test_)), kBetaValues(GetExampleScalars<U>(full_test_)), + prepare_data_(prepare_data), run_routine_(run_routine), get_result_(get_result), get_index_(get_index), @@ -112,6 +114,11 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st std::cout << std::flush; } + // Optionally prepares the input data + prepare_data_(args, queue_, kSeed, + x_source_, y_source_, a_source_, b_source_, c_source_, + ap_source_, scalar_source_); + // Set-up for the CLBlast run auto x_vec2 = Buffer<T>(context_, args.x_size); auto y_vec2 = Buffer<T>(context_, args.y_size); diff --git a/test/correctness/testblas.hpp b/test/correctness/testblas.hpp index 27fd84c3..ee795aad 100644 --- a/test/correctness/testblas.hpp +++ b/test/correctness/testblas.hpp @@ -74,6 +74,10 @@ class TestBlas: public Tester<T,U> { static const std::vector<Transpose> kTransposes; // Data-type dependent, see .cc-file // Shorthand for the routine-specific functions passed to the tester + using DataPrepare = std::function<void(const Arguments<U>&, Queue&, const int, + std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&)>; using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>; using ResultGet = std::function<std::vector<T>(const Arguments<U>&, Buffers<T>&, Queue&)>; using ResultIndex = std::function<size_t(const Arguments<U>&, const size_t, const size_t)>; @@ -82,6 +86,7 @@ class TestBlas: public Tester<T,U> { // Constructor, initializes the base class tester and input data TestBlas(const std::vector<std::string> &arguments, const bool silent, const std::string &name, const std::vector<std::string> &options, + const DataPrepare prepare_data, const Routine run_routine, const Routine run_reference1, const Routine run_reference2, const ResultGet get_result, const ResultIndex get_index, @@ -103,6 +108,7 @@ class TestBlas: public Tester<T,U> { std::vector<T> scalar_source_; // The routine-specific functions passed to the tester + DataPrepare prepare_data_; Routine run_routine_; Routine run_reference_; ResultGet get_result_; @@ -141,7 +147,7 @@ size_t RunTests(int argc, char *argv[], const bool silent, const std::string &na // Creates a tester auto options = C::GetOptions(); TestBlas<T,U> tester{command_line_args, silent, name, options, - C::RunRoutine, reference_routine1, reference_routine2, + C::PrepareData, C::RunRoutine, reference_routine1, reference_routine2, C::DownloadResult, C::GetResultIndex, C::ResultID1, C::ResultID2}; // This variable holds the arguments relevant for this routine diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index 046473f8..cbfc5bb2 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -365,6 +365,8 @@ std::string Tester<T,U>::GetOptionsString(const Arguments<U> &args) { if (o == kArgCOffset) { result += kArgCOffset + equals + ToString(args.c_offset) + " "; } if (o == kArgAPOffset) { result += kArgAPOffset + equals + ToString(args.ap_offset) + " "; } if (o == kArgDotOffset){ result += kArgDotOffset + equals + ToString(args.dot_offset) + " "; } + if (o == kArgAlpha) { result += kArgAlpha + equals + ToString(args.alpha) + " "; } + if (o == kArgBeta) { result += kArgBeta + equals + ToString(args.beta) + " "; } } return result; } diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp index f98bdb06..a22f681f 100644 --- a/test/routines/level1/xamax.hpp +++ b/test/routines/level1/xamax.hpp @@ -68,6 +68,11 @@ class TestXamax { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp index 64aa37c2..64377189 100644 --- a/test/routines/level1/xasum.hpp +++ b/test/routines/level1/xasum.hpp @@ -68,6 +68,11 @@ class TestXasum { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index b24e6fe8..eba067c0 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -69,6 +69,11 @@ class TestXaxpy { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp index 87bc21d4..753f0da5 100644 --- a/test/routines/level1/xcopy.hpp +++ b/test/routines/level1/xcopy.hpp @@ -68,6 +68,11 @@ class TestXcopy { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp index c4f6076a..8127247d 100644 --- a/test/routines/level1/xdot.hpp +++ b/test/routines/level1/xdot.hpp @@ -72,6 +72,11 @@ class TestXdot { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp index aae892a8..96d97dc4 100644 --- a/test/routines/level1/xdotc.hpp +++ b/test/routines/level1/xdotc.hpp @@ -72,6 +72,11 @@ class TestXdotc { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp index f6be385b..70c7fceb 100644 --- a/test/routines/level1/xdotu.hpp +++ b/test/routines/level1/xdotu.hpp @@ -72,6 +72,11 @@ class TestXdotu { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp index e604077c..ce33fe59 100644 --- a/test/routines/level1/xnrm2.hpp +++ b/test/routines/level1/xnrm2.hpp @@ -68,6 +68,11 @@ class TestXnrm2 { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp index 3c438bd6..d89688b4 100644 --- a/test/routines/level1/xscal.hpp +++ b/test/routines/level1/xscal.hpp @@ -65,6 +65,11 @@ class TestXscal { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp index a0491f12..49b0d3d0 100644 --- a/test/routines/level1/xswap.hpp +++ b/test/routines/level1/xswap.hpp @@ -68,6 +68,11 @@ class TestXswap { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index 5ed92aae..f371b9a7 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -80,6 +80,11 @@ class TestXgbmv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index 9ee6d535..2442be4c 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -80,6 +80,11 @@ class TestXgemv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 42283107..3e7ccbc3 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -76,6 +76,11 @@ class TestXger { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index ef69c197..d880ae1f 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -76,6 +76,11 @@ class TestXgerc { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index b2afc6d8..1735e42a 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -76,6 +76,11 @@ class TestXgeru { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index 8bda4d0c..99538bf1 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -74,6 +74,11 @@ class TestXhbmv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index 80565d04..3792cb66 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -74,6 +74,11 @@ class TestXhemv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index d71c8009..c58eb189 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -70,6 +70,11 @@ class TestXher { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<U>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index 083dfa2f..8a7eb0b6 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -74,6 +74,11 @@ class TestXher2 { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index 1dd63562..0862b619 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -74,6 +74,11 @@ class TestXhpmv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index a5c77811..5b454174 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -70,6 +70,11 @@ class TestXhpr { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<U>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index d09178f0..b770da2e 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -74,6 +74,11 @@ class TestXhpr2 { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index 8e0f8321..7a836170 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -74,6 +74,11 @@ class TestXsbmv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index 977f733a..352c8cfd 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -74,6 +74,11 @@ class TestXspmv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 93da4b73..988bcdc2 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -70,6 +70,11 @@ class TestXspr { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index b835f2b0..ee517bc1 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -74,6 +74,11 @@ class TestXspr2 { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index 0ec96f1d..5eecfb74 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -74,6 +74,11 @@ class TestXsymv { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index b49132e3..ac4ee1ff 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -70,6 +70,11 @@ class TestXsyr { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index 7c65daa2..43644883 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -74,6 +74,11 @@ class TestXsyr2 { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index cf30c2f7..ab9244af 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -69,6 +69,11 @@ class TestXtbmv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index d08e132f..3821e1a4 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -69,6 +69,11 @@ class TestXtpmv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index cf9a0063..7211c757 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -69,6 +69,11 @@ class TestXtrmv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index fed4378a..78b9672f 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -29,36 +29,6 @@ namespace clblast { // ================================================================================================= -// Prepares the data -template <typename T> -void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - if (args.a_ld < args.n) { return; } - if (args.a_size <= 0 || args.x_size <= 0) { return; } - - // Copies input buffers to the host - std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); - std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); - buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); - - // Generates 'proper' input for the TRSV routine - // TODO: Improve this, currently loosely based on clBLAS's implementation - for (auto i = size_t{0}; i < args.n; ++i) { - auto diagonal = a_mat_cpu[i*args.a_ld + i + args.a_offset]; - diagonal = AbsoluteValue(diagonal) + static_cast<T>(args.n / size_t{4}); - for (auto j = size_t{0}; j < args.n; ++j) { - a_mat_cpu[j*args.a_ld + i + args.a_offset] /= ConstantTwo<T>(); - } - a_mat_cpu[i*args.a_ld + i + args.a_offset] = diagonal; - x_vec_cpu[i * args.x_inc + args.x_offset] /= ConstantTwo<T>(); - } - - // Copies input buffers back to the OpenCL device - buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); - buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); - return; -} - // See comment at top of file for a description of the class template <typename T> class TestXtrsv { @@ -99,9 +69,28 @@ class TestXtrsv { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T> &args, Queue&, const int, std::vector<T> &x_source, + std::vector<T>&, std::vector<T> &a_source, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) { + if (args.a_ld < args.n) { return; } + if (args.a_size <= 0 || args.x_size <= 0) { return; } + + // Generates 'proper' input for the TRSV routine + // TODO: Improve this, currently loosely based on clBLAS's implementation + for (auto i = size_t{0}; i < args.n; ++i) { + auto diagonal = a_source[i*args.a_ld + i + args.a_offset]; + diagonal = static_cast<T>(AbsoluteValue(diagonal)) + static_cast<T>(args.n / size_t{4}); + for (auto j = size_t{0}; j < args.n; ++j) { + a_source[j*args.a_ld + i + args.a_offset] /= Constant<T>(2.0); + } + a_source[i*args.a_ld + i + args.a_offset] = diagonal; + x_source[i * args.x_inc + args.x_offset] /= Constant<T>(2.0); + } + } + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = Trsv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal, @@ -116,7 +105,6 @@ class TestXtrsv { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrsv<T>(convertToCLBLAS(args.layout), @@ -135,7 +123,6 @@ class TestXtrsv { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index bca3c049..1b12fb1c 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -82,6 +82,11 @@ class TestXgemm { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &all) { return all; } + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index 31c7695f..76550b15 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -82,6 +82,11 @@ class TestXhemm { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index ff2bb6cb..5ca3aac6 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -80,6 +80,11 @@ class TestXher2k { static Transposes GetATransposes(const Transposes &) { return {Transpose::kNo, Transpose::kConjugate}; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<U>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index 26396fa9..e93d887a 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -73,6 +73,11 @@ class TestXherk { static Transposes GetATransposes(const Transposes &) { return {Transpose::kNo, Transpose::kConjugate}; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<U>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index c84c22b4..9d127e26 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -82,6 +82,11 @@ class TestXsymm { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 5c4976e2..d1bdac56 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -80,6 +80,11 @@ class TestXsyr2k { static Transposes GetATransposes(const Transposes &) { return {Transpose::kNo, Transpose::kYes}; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 98c4f6a4..1330924e 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -73,6 +73,11 @@ class TestXsyrk { static Transposes GetATransposes(const Transposes &) { return {Transpose::kNo, Transpose::kYes}; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 55b51e54..7c5bd842 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -73,6 +73,11 @@ class TestXtrmm { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index 1ffaef35..0da4189d 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -19,6 +19,8 @@ #include <vector> #include <string> +#include "test/routines/level3/xtrsm_data.hpp" + #ifdef CLBLAST_REF_CLBLAS #include "test/wrapper_clblas.hpp" #endif @@ -29,38 +31,6 @@ namespace clblast { // ================================================================================================= -// Prepares the data -template <typename T> -void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - const auto k = (args.side == Side::kLeft) ? args.m : args.n; - if (args.a_ld < k) { return; } - if (args.a_size <= 0 || args.b_size <= 0) { return; } - - // Copies input buffers to the host - std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); - std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); - buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); - - // Generates 'proper' input for the TRSM routine - // TODO: Improve this - for (auto i = size_t{0}; i < k; ++i) { - for (auto j = size_t{0}; j < k; ++j) { - auto value = a_mat_cpu[j*args.a_ld + i + args.a_offset]; - value *= ConstantTwo<T>(); - if (IsCloseToZero(value)) { value += ConstantOne<T>(); } - a_mat_cpu[j*args.a_ld + i + args.a_offset] = value; - } - } - - // Copies input buffers back to the OpenCL device - buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); - buffers.b_mat.Write(queue, args.b_size, b_mat_cpu); - return; -} - -// ================================================================================================= - // See comment at top of file for a description of the class template <typename T> class TestXtrsm { @@ -105,9 +75,23 @@ class TestXtrsm { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T> &args, Queue &queue, const int seed, + std::vector<T>&, std::vector<T>&, + std::vector<T>& a_source_, std::vector<T>& b_source_, std::vector<T>&, + std::vector<T>&, std::vector<T>&) { + const auto k = (args.side == Side::kLeft) ? args.m : args.n; + const auto b_one = (args.layout == Layout::kRowMajor) ? args.n : args.m; + if (args.a_ld < k) { return; } + if (args.b_ld < b_one) { return; } + if (args.a_size <= 0 || args.b_size <= 0) { return; } + + // TODO: This is a copy of the clBLAS random matrix generation, make it work properly + GenerateProperTrsmMatrices(args, seed, &a_source_[args.a_offset], &b_source_[args.b_offset]); + } + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, @@ -122,7 +106,6 @@ class TestXtrsm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrsm(convertToCLBLAS(args.layout), @@ -142,7 +125,6 @@ class TestXtrsm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - PrepareData(args, buffers, queue); std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); diff --git a/test/routines/level3/xtrsm_data.hpp b/test/routines/level3/xtrsm_data.hpp new file mode 100644 index 00000000..21083fe9 --- /dev/null +++ b/test/routines/level3/xtrsm_data.hpp @@ -0,0 +1,187 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements data-prepration routines for proper input for the TRSM routine. Note: The +// data-preparation routines are taken from clBLAS +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XTRSM_DATA_H_ +#define CLBLAST_TEST_ROUTINES_XTRSM_DATA_H_ + +#include <vector> +#include <string> + +#include "utilities/utilities.hpp" + +namespace clblast { +// ================================================================================================= + +// Limits to prepare proper input data +template <typename T> double TrsmLimitMatA(); +template <> double TrsmLimitMatA<float>() { return pow(2.0, 7); } +template <> double TrsmLimitMatA<double>() { return pow(2.0, 5); } +template <> double TrsmLimitMatA<float2>() { return TrsmLimitMatA<float>(); } +template <> double TrsmLimitMatA<double2>() { return TrsmLimitMatA<double>(); } +template <typename T> double TrsmLimitMatB(); +template <> double TrsmLimitMatB<float>() { return pow(2.0, 16); } +template <> double TrsmLimitMatB<double>() { return pow(2.0, 47); } +template <> double TrsmLimitMatB<float2>() { return TrsmLimitMatB<float>(); } +template <> double TrsmLimitMatB<double2>() { return TrsmLimitMatB<double>(); } + +// Matrix element setter +template <typename T> +void SetElement(const clblast::Layout layout, + const size_t row, const size_t column, T *mat, const size_t ld, const T value) +{ + if (layout == clblast::Layout::kRowMajor) { mat[column + ld * row] = value; } + else { mat[row + ld * column] = value; } +} + +// Matrix element getter +template <typename T> +T GetElement(const clblast::Layout layout, + const size_t row, const size_t column, const T *mat, const size_t ld) +{ + if (layout == clblast::Layout::kRowMajor) { return mat[column + ld * row]; } + else { return mat[row + ld * column]; } +} + +// Bounds a value between 'left' and 'right'. The random value is assumed to be between -1 and +1. +template<typename T> +T BoundRandom(const double rand_val, const double left, const double right) +{ + const auto value = Constant<T>(rand_val * (right - left)); + if (AbsoluteValue<T>(value) < 0.0) { + return value - Constant<T>(left); + } + else { + return value + Constant<T>(left); + } +} + +// The clBLAS function to generate proper input matrices for matrices A & B. Note that this routine +// should remain deterministic. Random values are therefore taken from the existing input, which +// is scaled between -1 and +1. +template <typename T> +void GenerateProperTrsmMatrices(const Arguments<T> &args, const int seed, T *mat_a, T *mat_b) +{ + // Random number generator + std::mt19937 mt(seed); + std::uniform_real_distribution<double> dist(-1.0, 1.0); + + const auto k = (args.side == Side::kLeft) ? args.m : args.n; + + // Determines: max(|a_{ii}|) and min(|a_{ii}|) + // Generates: a_{ii} which are constrainted by min/max + auto min = ConstantZero<T>(); + if (args.diagonal == clblast::Diagonal::kUnit) { + for (auto i = size_t{0}; i < k; ++i) { + SetElement<T>(args.layout, i, i, mat_a, args.a_ld, ConstantOne<T>()); // must not be accessed + } + } + else { + auto max = Constant<T>(dist(mt) * TrsmLimitMatA<T>()); + if (AbsoluteValue(max) < 1.0) { max += Constant<T>(3.0); } // no zero's on the diagonal + min = max / Constant<T>(100.0); + SetElement<T>(args.layout, 0, 0, mat_a, args.a_ld, max); + for (auto i = size_t{1}; i < k; ++i) { + auto value = BoundRandom<T>(dist(mt), AbsoluteValue(min), AbsoluteValue(max)); + if (AbsoluteValue(value) == 0) { + value = max; + } + SetElement<T>(args.layout, i, i, mat_a, args.a_ld, value); + } + } + + // Generates a_{ij} for all j <> i. + for (auto i = size_t{0}; i < k; ++i) { + auto sum = (args.diagonal == clblast::Diagonal::kUnit) ? + AbsoluteValue(ConstantOne<T>()) : + AbsoluteValue(GetElement<T>(args.layout, i, i, mat_a, args.a_ld)); + for (auto j = size_t{0}; j < k; ++j) { + if (j == i) { continue; } + auto value = ConstantZero<T>(); + if (((args.triangle == clblast::Triangle::kUpper) && (j > i)) || + ((args.triangle == clblast::Triangle::kLower) && (j < i))) { + if (sum >= 1.0) { + const auto limit = sum / std::sqrt(static_cast<double>(k) - static_cast<double>(j)); + value = Constant<T>(dist(mt) * limit); + sum -= AbsoluteValue(value); + } + } + SetElement<T>(args.layout, i, j, mat_a, args.a_ld, value); + } + } + + // Generate matrix B + if (args.side == clblast::Side::kLeft) { + for (auto j = size_t{0}; j < args.n; ++j) { + auto sum = TrsmLimitMatB<T>(); + for (auto i = size_t{0}; i < args.m; ++i) { + const auto a_value = GetElement<T>(args.layout, i, i, mat_a, args.a_ld); + auto value = ConstantZero<T>(); + if (sum >= 0.0) { + const auto limit = sum * AbsoluteValue(a_value) / std::sqrt(static_cast<double>(args.m) - static_cast<double>(i)); + value = Constant<T>(dist(mt) * limit); + sum -= AbsoluteValue(value) / AbsoluteValue(a_value); + } + SetElement<T>(args.layout, i, j, mat_b, args.b_ld, value); + if ((i == 0 && j == 0) || (AbsoluteValue(value) < AbsoluteValue(min))) { + min = value; + } + } + } + } + else { + for (auto i = size_t{0}; i < args.m; ++i) { + auto sum = TrsmLimitMatB<T>(); + for (auto j = size_t{0}; j < args.n; ++j) { + const auto a_value = GetElement<T>(args.layout, j, j, mat_a, args.a_ld); + auto value = ConstantZero<T>(); + if (sum >= 0.0) { + const auto limit = sum * AbsoluteValue(a_value) / std::sqrt(static_cast<double>(args.n) - static_cast<double>(j)); + value = Constant<T>(dist(mt) * limit); + sum -= AbsoluteValue(value) / AbsoluteValue(a_value); + } + SetElement<T>(args.layout, i, j, mat_b, args.b_ld, value); + if ((i == 0 && j == 0) || (AbsoluteValue(value) < AbsoluteValue(min))) { + min = value; + } + } + } + } + if (args.diagonal == clblast::Diagonal::kUnit) { + for (auto i = size_t{0}; i < k; ++i) { + SetElement<T>(args.layout, i, i, mat_a, args.a_ld, ConstantOne<T>()); // must not be accessed + } + } + + // Calculate a proper alpha + if (AbsoluteValue(min) > AbsoluteValue(args.alpha)) { + // Not implemented + } + + // Adjust matrix B according to the value of alpha + if (AbsoluteValue(args.alpha) != 1.0 && AbsoluteValue(args.alpha) != 0.0) { + for (auto i = size_t{0}; i < args.m; ++i) { + for (auto j = size_t{0}; j < args.n; ++j) { + auto value = GetElement<T>(args.layout, i, j, mat_b, args.b_ld); + value /= args.alpha; + SetElement<T>(args.layout, i, j, mat_b, args.b_ld, value); + } + } + } +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XTRSM_DATA_H_ +#endif diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index c6ce4b07..05bea9aa 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -167,6 +167,11 @@ class TestXinvert { static Transposes GetATransposes(const Transposes &) { return {}; } // N/A for this routine static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { try { diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp index f0739c6a..d1064d0c 100644 --- a/test/routines/levelx/xomatcopy.hpp +++ b/test/routines/levelx/xomatcopy.hpp @@ -127,6 +127,11 @@ class TestXomatcopy { static Transposes GetATransposes(const Transposes &all) { return all; } static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + // Describes how to prepare the input data + static void PrepareData(const Arguments<T>&, Queue&, const int, std::vector<T>&, + std::vector<T>&, std::vector<T>&, std::vector<T>&, std::vector<T>&, + std::vector<T>&, std::vector<T>&) {} // N/A for this routine + // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); |