summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/kernels/level3/invert_diagonal_blocks.opencl4
-rw-r--r--src/utilities/utilities.cpp118
-rw-r--r--src/utilities/utilities.hpp26
-rw-r--r--test/routines/level2/xtrsv.hpp4
-rw-r--r--test/routines/level3/xtrsm.hpp34
-rw-r--r--test/routines/levelx/xinvert.hpp7
6 files changed, 95 insertions, 98 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl
index e94b4d30..d43b9b7c 100644
--- a/src/kernels/level3/invert_diagonal_blocks.opencl
+++ b/src/kernels/level3/invert_diagonal_blocks.opencl
@@ -100,7 +100,9 @@ void InvertDiagonalBlock(int n, __global const real* restrict src, const int src
if (unit_diagonal == 0) {
const real diagonal_value = lm[thread_index][thread_index];
if (!IsZero(diagonal_value)) { // Only for non-singular values and values inside the matrix
- DivideReal(inverted_diagonal, inverted_diagonal, diagonal_value);
+ real constant_one;
+ SetToOne(constant_one);
+ DivideReal(inverted_diagonal, constant_one, diagonal_value);
}
}
lm[thread_index][thread_index] = inverted_diagonal;
diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp
index b2ed2f0c..9cf75490 100644
--- a/src/utilities/utilities.cpp
+++ b/src/utilities/utilities.cpp
@@ -24,100 +24,52 @@ namespace clblast {
// =================================================================================================
// Returns a scalar with a default value
-template <typename T>
-T GetScalar() {
- return static_cast<T>(2.0);
-}
+template <typename T> T GetScalar() { return static_cast<T>(2.0); }
template float GetScalar<float>();
template double GetScalar<double>();
-
-// Specialized version of the above for half-precision
-template <>
-half GetScalar() {
- return FloatToHalf(2.0f);
-}
-
-// Specialized versions of the above for complex data-types
-template <>
-float2 GetScalar() {
- return {2.0f, 0.5f};
-}
-template <>
-double2 GetScalar() {
- return {2.0, 0.5};
-}
+template <> half GetScalar() { return FloatToHalf(2.0f); }
+template <> float2 GetScalar() { return {2.0f, 0.5f}; }
+template <> double2 GetScalar() { return {2.0, 0.5}; }
// Returns a scalar of value 0
-template <typename T>
-T ConstantZero() {
- return static_cast<T>(0.0);
-}
+template <typename T> T ConstantZero() { return static_cast<T>(0.0); }
template float ConstantZero<float>();
template double ConstantZero<double>();
-
-// Specialized version of the above for half-precision
-template <>
-half ConstantZero() {
- return FloatToHalf(0.0f);
-}
-
-// Specialized versions of the above for complex data-types
-template <>
-float2 ConstantZero() {
- return {0.0f, 0.0f};
-}
-template <>
-double2 ConstantZero() {
- return {0.0, 0.0};
-}
+template <> half ConstantZero() { return FloatToHalf(0.0f); }
+template <> float2 ConstantZero() { return {0.0f, 0.0f}; }
+template <> double2 ConstantZero() { return {0.0, 0.0}; }
// Returns a scalar of value 1
-template <typename T>
-T ConstantOne() {
- return static_cast<T>(1.0);
-}
+template <typename T> T ConstantOne() { return static_cast<T>(1.0); }
template float ConstantOne<float>();
template double ConstantOne<double>();
-
-// Specialized version of the above for half-precision
-template <>
-half ConstantOne() {
- return FloatToHalf(1.0f);
-}
-
-// Specialized versions of the above for complex data-types
-template <>
-float2 ConstantOne() {
- return {1.0f, 0.0f};
-}
-template <>
-double2 ConstantOne() {
- return {1.0, 0.0};
-}
+template <> half ConstantOne() { return FloatToHalf(1.0f); }
+template <> float2 ConstantOne() { return {1.0f, 0.0f}; }
+template <> double2 ConstantOne() { return {1.0, 0.0}; }
// Returns a scalar of value -1
-template <typename T>
-T ConstantNegOne() {
- return static_cast<T>(-1.0);
-}
+template <typename T> T ConstantNegOne() { return static_cast<T>(-1.0); }
template float ConstantNegOne<float>();
template double ConstantNegOne<double>();
+template <> half ConstantNegOne() { return FloatToHalf(-1.0f); }
+template <> float2 ConstantNegOne() { return {-1.0f, 0.0f}; }
+template <> double2 ConstantNegOne() { return {-1.0, 0.0}; }
-// Specialized version of the above for half-precision
-template <>
-half ConstantNegOne() {
- return FloatToHalf(-1.0f);
-}
-
-// Specialized versions of the above for complex data-types
-template <>
-float2 ConstantNegOne() {
- return {-1.0f, 0.0f};
-}
-template <>
-double2 ConstantNegOne() {
- return {-1.0, 0.0};
-}
+// Returns a scalar of value 1
+template <typename T> T ConstantTwo() { return static_cast<T>(2.0); }
+template float ConstantTwo<float>();
+template double ConstantTwo<double>();
+template <> half ConstantTwo() { return FloatToHalf(2.0f); }
+template <> float2 ConstantTwo() { return {2.0f, 0.0f}; }
+template <> double2 ConstantTwo() { return {2.0, 0.0}; }
+
+// Returns a small scalar value just larger than 0
+template <typename T> T SmallConstant() { return static_cast<T>(1e7); }
+template float SmallConstant<float>();
+template double SmallConstant<double>();
+template <> half SmallConstant() { return FloatToHalf(1e7); }
+template <> float2 SmallConstant() { return {1e7, 0.0f}; }
+template <> double2 SmallConstant() { return {1e7, 0.0}; }
// Returns the absolute value of a scalar
template <typename T> T AbsoluteValue(const T value) { return std::fabs(value); }
@@ -127,6 +79,14 @@ template <> half AbsoluteValue(const half value) { return FloatToHalf(std::fabs(
template <> float2 AbsoluteValue(const float2 value) { return std::abs(value); }
template <> double2 AbsoluteValue(const double2 value) { return std::abs(value); }
+// Returns whether a scalar is close to zero
+template <typename T> bool IsCloseToZero(const T value) { return (value > -SmallConstant<T>()) && (value < SmallConstant<T>()); }
+template bool IsCloseToZero<float>(const float);
+template bool IsCloseToZero<double>(const double);
+template <> bool IsCloseToZero(const half value) { return IsCloseToZero(HalfToFloat(value)); }
+template <> bool IsCloseToZero(const float2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); }
+template <> bool IsCloseToZero(const double2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); }
+
// =================================================================================================
// Implements the string conversion using std::to_string if possible
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 2c13658b..044955ea 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -99,24 +99,20 @@ constexpr auto kArgNoAbbreviations = "no_abbrv";
// =================================================================================================
// Returns a scalar with a default value
-template <typename T>
-T GetScalar();
-
-// Returns a scalar of value 0
-template <typename T>
-T ConstantZero();
-
-// Returns a scalar of value 1
-template <typename T>
-T ConstantOne();
+template <typename T> T GetScalar();
-// Returns a scalar of value -1
-template <typename T>
-T ConstantNegOne();
+// Fixed value scalars
+template <typename T> T ConstantZero();
+template <typename T> T ConstantOne();
+template <typename T> T ConstantNegOne();
+template <typename T> T ConstantTwo();
+template <typename T> T SmallConstant();
// Returns the absolute value of a scalar
-template <typename T>
-T AbsoluteValue(const T value);
+template <typename T> T AbsoluteValue(const T value);
+
+// Returns whether a scalar is close to zero
+template <typename T> bool IsCloseToZero(const T value);
// =================================================================================================
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index 811feac5..72ebdf9e 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -46,10 +46,10 @@ void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto diagonal = a_mat_cpu[i*args.a_ld + i + args.a_offset];
diagonal = AbsoluteValue(diagonal) + static_cast<T>(args.n / size_t{4});
for (auto j = size_t{0}; j < args.n; ++j) {
- a_mat_cpu[j*args.a_ld + i + args.a_offset] /= T{2.0};
+ a_mat_cpu[j*args.a_ld + i + args.a_offset] /= ConstantTwo<T>();
}
a_mat_cpu[i*args.a_ld + i + args.a_offset] = diagonal;
- x_vec_cpu[i * args.x_inc + args.x_offset] /= T{2.0};
+ x_vec_cpu[i * args.x_inc + args.x_offset] /= ConstantTwo<T>();
}
// Copies input buffers back to the OpenCL device
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index e59c96ff..246cb930 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -29,6 +29,37 @@
namespace clblast {
// =================================================================================================
+// Prepares the data
+template <typename T>
+void PrepareData(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ const auto k = (args.side == Side::kLeft) ? args.m : args.n;
+ if (args.a_ld < k) { return; }
+
+ // Copies input buffers to the host
+ std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
+ std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
+ buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+
+ // Generates 'proper' input for the TRSM routine
+ // TODO: Improve this
+ for (auto i = size_t{0}; i < k; ++i) {
+ for (auto j = size_t{0}; j < k; ++j) {
+ auto value = a_mat_cpu[j*args.a_ld + i + args.a_offset];
+ value *= ConstantTwo<T>();
+ if (IsCloseToZero(value)) { value += ConstantOne<T>(); }
+ a_mat_cpu[j*args.a_ld + i + args.a_offset] = value;
+ }
+ }
+
+ // Copies input buffers back to the OpenCL device
+ buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ return;
+}
+
+// =================================================================================================
+
// See comment at top of file for a description of the class
template <typename T>
class TestXtrsm {
@@ -75,6 +106,7 @@ class TestXtrsm {
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ PrepareData(args, buffers, queue);
auto queue_plain = queue();
auto event = cl_event{};
auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal,
@@ -89,6 +121,7 @@ class TestXtrsm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ PrepareData(args, buffers, queue);
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtrsm(convertToCLBLAS(args.layout),
@@ -108,6 +141,7 @@ class TestXtrsm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ PrepareData(args, buffers, queue);
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index 4408e8d5..c6ce4b07 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -41,9 +41,14 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
const auto num_blocks = CeilDiv(args.n, block_size);
const auto a_ld = args.a_ld;
const auto b_ld = block_size;
- if ((block_size == 0) || (args.n == 0) || (block_size > args.n)) {
+
+ // Checks for valid arguments
+ if ((block_size == 0) || (args.n == 0)) {
return StatusCode::kInvalidDimension;
}
+ if ((block_size % 16 != 0) || (block_size > 128)) {
+ return StatusCode::kUnknownError;
+ }
// Loops over the amount of diagonal blocks of size args.m by args.m each
for (auto block_id = size_t{0}; block_id < num_blocks; ++block_id) {