summaryrefslogtreecommitdiff
path: root/src/routines/level2/xtrsv.cpp
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-08-13 22:57:10 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-08-13 22:58:01 +0200
commit3115c15db5a3b8fae0e9788cbcadaba91920af3c (patch)
treefde0cc3016722628876710edac05c92ab7ecae1b /src/routines/level2/xtrsv.cpp
parentdd1fa7cc8184223900292939dab2518e6a28a6db (diff)
Small refactoring of events in TRSV substitution routine
Diffstat (limited to 'src/routines/level2/xtrsv.cpp')
-rw-r--r--src/routines/level2/xtrsv.cpp11
1 files changed, 6 insertions, 5 deletions
diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp
index 76401753..a87c5410 100644
--- a/src/routines/level2/xtrsv.cpp
+++ b/src/routines/level2/xtrsv.cpp
@@ -33,7 +33,8 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle,
const size_t n,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_inc,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ EventPointer event) {
if (n > db_["TRSV_BLOCK_SIZE"]) { throw BLASError(StatusCode::kUnexpectedError); };
@@ -69,9 +70,7 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle,
// Launches the kernel
const auto local = std::vector<size_t>{db_["TRSV_BLOCK_SIZE"]};
const auto global = std::vector<size_t>{Ceil(n, db_["TRSV_BLOCK_SIZE"])};
- auto event = Event();
- RunKernel(kernel, queue_, device_, global, local, event.pointer());
- event.WaitForCompletion();
+ RunKernel(kernel, queue_, device_, global, local, event);
}
// =================================================================================================
@@ -146,10 +145,12 @@ void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle,
}
// Runs the triangular substitution for the block size
+ auto sub_event = Event();
Substitution(layout, triangle, a_transpose, diagonal, block_size,
a_buffer, a_offset + col + col*a_ld, a_ld,
b_buffer, b_offset + col*b_inc, b_inc,
- x_buffer, x_offset + col*x_inc, x_inc);
+ x_buffer, x_offset + col*x_inc, x_inc, sub_event.pointer());
+ sub_event.WaitForCompletion();
}
// Retrieves the results