summaryrefslogtreecommitdiff
path: root/test/routines/levelx/xinvert.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'test/routines/levelx/xinvert.hpp')
-rw-r--r--test/routines/levelx/xinvert.hpp33
1 files changed, 25 insertions, 8 deletions
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index cc02a88b..b8503029 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -17,6 +17,7 @@
#define CLBLAST_TEST_ROUTINES_XINVERT_H_
#include "test/routines/common.hpp"
+#include "src/routines/levelx/xinvert.hpp"
namespace clblast {
// =================================================================================================
@@ -40,6 +41,13 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
return StatusCode::kUnknownError;
}
+ // Start at zero
+ for (size_t i =0; i < args.m; ++i) {
+ for (size_t j = 0; j < args.n; ++j) {
+ buffers_host.b_mat[j * args.m + i] = T{0.0};
+ }
+ }
+
// Loops over the amount of diagonal blocks of size args.m by args.m each
for (auto block_id = size_t{0}; block_id < num_blocks; ++block_id) {
const auto a_offset = block_id * (block_size + a_ld * block_size) + args.a_offset;
@@ -164,14 +172,23 @@ class TestXinvert {
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
try {
- auto event = cl_event{};
- auto inverter = Xinvert<T>(queue, &event);
- inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal,
- args.n, args.m,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat);
- clWaitForEvents(1, &event);
- clReleaseEvent(event);
+ #ifdef OPENCL_API
+ auto event = cl_event{};
+ auto inverter = Xinvert<T>(queue, &event);
+ inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal,
+ args.n, args.m,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat);
+ clWaitForEvents(1, &event);
+ clReleaseEvent(event);
+ #elif CUDA_API
+ auto inverter = Xinvert<T>(queue, nullptr);
+ inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal,
+ args.n, args.m,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat);
+ cuStreamSynchronize(queue());
+ #endif
} catch (...) { return DispatchException(); }
return StatusCode::kSuccess;
}