diff options
-rw-r--r-- | src/kernels/level3/invert_diagonal_blocks.opencl | 4 | ||||
-rw-r--r-- | src/utilities/utilities.cpp | 118 | ||||
-rw-r--r-- | src/utilities/utilities.hpp | 26 | ||||
-rw-r--r-- | test/routines/level2/xtrsv.hpp | 4 | ||||
-rw-r--r-- | test/routines/level3/xtrsm.hpp | 34 | ||||
-rw-r--r-- | test/routines/levelx/xinvert.hpp | 7 |
6 files changed, 95 insertions, 98 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl index e94b4d30..d43b9b7c 100644 --- a/src/kernels/level3/invert_diagonal_blocks.opencl +++ b/src/kernels/level3/invert_diagonal_blocks.opencl @@ -100,7 +100,9 @@ void InvertDiagonalBlock(int n, __global const real* restrict src, const int src if (unit_diagonal == 0) { const real diagonal_value = lm[thread_index][thread_index]; if (!IsZero(diagonal_value)) { // Only for non-singular values and values inside the matrix - DivideReal(inverted_diagonal, inverted_diagonal, diagonal_value); + real constant_one; + SetToOne(constant_one); + DivideReal(inverted_diagonal, constant_one, diagonal_value); } } lm[thread_index][thread_index] = inverted_diagonal; diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp index b2ed2f0c..9cf75490 100644 --- a/src/utilities/utilities.cpp +++ b/src/utilities/utilities.cpp @@ -24,100 +24,52 @@ namespace clblast { // ================================================================================================= // Returns a scalar with a default value -template <typename T> -T GetScalar() { - return static_cast<T>(2.0); -} +template <typename T> T GetScalar() { return static_cast<T>(2.0); } template float GetScalar<float>(); template double GetScalar<double>(); - -// Specialized version of the above for half-precision -template <> -half GetScalar() { - return FloatToHalf(2.0f); -} - -// Specialized versions of the above for complex data-types -template <> -float2 GetScalar() { - return {2.0f, 0.5f}; -} -template <> -double2 GetScalar() { - return {2.0, 0.5}; -} +template <> half GetScalar() { return FloatToHalf(2.0f); } +template <> float2 GetScalar() { return {2.0f, 0.5f}; } +template <> double2 GetScalar() { return {2.0, 0.5}; } // Returns a scalar of value 0 -template <typename T> -T ConstantZero() { - return static_cast<T>(0.0); -} +template <typename T> T ConstantZero() { return static_cast<T>(0.0); } template float ConstantZero<float>(); template double ConstantZero<double>(); - -// Specialized version of the above for half-precision -template <> -half ConstantZero() { - return FloatToHalf(0.0f); -} - -// Specialized versions of the above for complex data-types -template <> -float2 ConstantZero() { - return {0.0f, 0.0f}; -} -template <> -double2 ConstantZero() { - return {0.0, 0.0}; -} +template <> half ConstantZero() { return FloatToHalf(0.0f); } +template <> float2 ConstantZero() { return {0.0f, 0.0f}; } +template <> double2 ConstantZero() { return {0.0, 0.0}; } // Returns a scalar of value 1 -template <typename T> -T ConstantOne() { - return static_cast<T>(1.0); -} +template <typename T> T ConstantOne() { return static_cast<T>(1.0); } template float ConstantOne<float>(); template double ConstantOne<double>(); - -// Specialized version of the above for half-precision -template <> -half ConstantOne() { - return FloatToHalf(1.0f); -} - -// Specialized versions of the above for complex data-types -template <> -float2 ConstantOne() { - return {1.0f, 0.0f}; -} -template <> -double2 ConstantOne() { - return {1.0, 0.0}; -} +template <> half ConstantOne() { return FloatToHalf(1.0f); } +template <> float2 ConstantOne() { return {1.0f, 0.0f}; } +template <> double2 ConstantOne() { return {1.0, 0.0}; } // Returns a scalar of value -1 -template <typename T> -T ConstantNegOne() { - return static_cast<T>(-1.0); -} +template <typename T> T ConstantNegOne() { return static_cast<T>(-1.0); } template float ConstantNegOne<float>(); template double ConstantNegOne<double>(); +template <> half ConstantNegOne() { return FloatToHalf(-1.0f); } +template <> float2 ConstantNegOne() { return {-1.0f, 0.0f}; } +template <> double2 ConstantNegOne() { return {-1.0, 0.0}; } -// Specialized version of the above for half-precision -template <> -half ConstantNegOne() { - return FloatToHalf(-1.0f); -} - -// Specialized versions of the above for complex data-types -template <> -float2 ConstantNegOne() { - return {-1.0f, 0.0f}; -} -template <> -double2 ConstantNegOne() { - return {-1.0, 0.0}; -} +// Returns a scalar of value 1 +template <typename T> T ConstantTwo() { return static_cast<T>(2.0); } +template float ConstantTwo<float>(); +template double ConstantTwo<double>(); +template <> half ConstantTwo() { return FloatToHalf(2.0f); } +template <> float2 ConstantTwo() { return {2.0f, 0.0f}; } +template <> double2 ConstantTwo() { return {2.0, 0.0}; } + +// Returns a small scalar value just larger than 0 +template <typename T> T SmallConstant() { return static_cast<T>(1e7); } +template float SmallConstant<float>(); +template double SmallConstant<double>(); +template <> half SmallConstant() { return FloatToHalf(1e7); } +template <> float2 SmallConstant() { return {1e7, 0.0f}; } +template <> double2 SmallConstant() { return {1e7, 0.0}; } // Returns the absolute value of a scalar template <typename T> T AbsoluteValue(const T value) { return std::fabs(value); } @@ -127,6 +79,14 @@ template <> half AbsoluteValue(const half value) { return FloatToHalf(std::fabs( template <> float2 AbsoluteValue(const float2 value) { return std::abs(value); } template <> double2 AbsoluteValue(const double2 value) { return std::abs(value); } +// Returns whether a scalar is close to zero +template <typename T> bool IsCloseToZero(const T value) { return (value > -SmallConstant<T>()) && (value < SmallConstant<T>()); } +template bool IsCloseToZero<float>(const float); +template bool IsCloseToZero<double>(const double); +template <> bool IsCloseToZero(const half value) { return IsCloseToZero(HalfToFloat(value)); } +template <> bool IsCloseToZero(const float2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); } +template <> bool IsCloseToZero(const double2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); } + // ================================================================================================= // Implements the string conversion using std::to_string if possible diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 2c13658b..044955ea 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -99,24 +99,20 @@ constexpr auto kArgNoAbbreviations = "no_abbrv"; // ================================================================================================= // Returns a scalar with a default value -template <typename T> -T GetScalar(); - -// Returns a scalar of value 0 -template <typename T> -T ConstantZero(); - -// Returns a scalar of value 1 -template <typename T> -T ConstantOne(); +template <typename T> T GetScalar(); -// Returns a scalar of value -1 -template <typename T> -T ConstantNegOne(); +// Fixed value scalars +template <typename T> T ConstantZero(); +template <typename T> T ConstantOne(); +template <typename T> T ConstantNegOne(); +template <typename T> T ConstantTwo(); +template <typename T> T SmallConstant(); // Returns the absolute value of a scalar -template <typename T> -T AbsoluteValue(const T value); +template <typename T> T AbsoluteValue(const T value); + +// Returns whether a scalar is close to zero +template <typename T> bool IsCloseToZero(const T value); // ================================================================================================= diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 811feac5..72ebdf9e 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -46,10 +46,10 @@ void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto diagonal = a_mat_cpu[i*args.a_ld + i + args.a_offset]; diagonal = AbsoluteValue(diagonal) + static_cast<T>(args.n / size_t{4}); for (auto j = size_t{0}; j < args.n; ++j) { - a_mat_cpu[j*args.a_ld + i + args.a_offset] /= T{2.0}; + a_mat_cpu[j*args.a_ld + i + args.a_offset] /= ConstantTwo<T>(); } a_mat_cpu[i*args.a_ld + i + args.a_offset] = diagonal; - x_vec_cpu[i * args.x_inc + args.x_offset] /= T{2.0}; + x_vec_cpu[i * args.x_inc + args.x_offset] /= ConstantTwo<T>(); } // Copies input buffers back to the OpenCL device diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index e59c96ff..246cb930 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -29,6 +29,37 @@ namespace clblast { // ================================================================================================= +// Prepares the data +template <typename T> +void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + const auto k = (args.side == Side::kLeft) ? args.m : args.n; + if (args.a_ld < k) { return; } + + // Copies input buffers to the host + std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); + std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + + // Generates 'proper' input for the TRSM routine + // TODO: Improve this + for (auto i = size_t{0}; i < k; ++i) { + for (auto j = size_t{0}; j < k; ++j) { + auto value = a_mat_cpu[j*args.a_ld + i + args.a_offset]; + value *= ConstantTwo<T>(); + if (IsCloseToZero(value)) { value += ConstantOne<T>(); } + a_mat_cpu[j*args.a_ld + i + args.a_offset] = value; + } + } + + // Copies input buffers back to the OpenCL device + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Write(queue, args.b_size, b_mat_cpu); + return; +} + +// ================================================================================================= + // See comment at top of file for a description of the class template <typename T> class TestXtrsm { @@ -75,6 +106,7 @@ class TestXtrsm { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, @@ -89,6 +121,7 @@ class TestXtrsm { // Describes how to run the clBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CLBLAS static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + PrepareData(args, buffers, queue); auto queue_plain = queue(); auto event = cl_event{}; auto status = clblasXtrsm(convertToCLBLAS(args.layout), @@ -108,6 +141,7 @@ class TestXtrsm { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + PrepareData(args, buffers, queue); std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0)); buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index 4408e8d5..c6ce4b07 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -41,9 +41,14 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu const auto num_blocks = CeilDiv(args.n, block_size); const auto a_ld = args.a_ld; const auto b_ld = block_size; - if ((block_size == 0) || (args.n == 0) || (block_size > args.n)) { + + // Checks for valid arguments + if ((block_size == 0) || (args.n == 0)) { return StatusCode::kInvalidDimension; } + if ((block_size % 16 != 0) || (block_size > 128)) { + return StatusCode::kUnknownError; + } // Loops over the amount of diagonal blocks of size args.m by args.m each for (auto block_id = size_t{0}; block_id < num_blocks; ++block_id) { |