diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-27 22:01:15 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-27 22:01:15 +0200 |
commit | 44f7fa628a42d1e928c7c1c3a1bfca5de37de276 (patch) | |
tree | fc7119a2810d68d08e3809ee92d9823f3212bf6a | |
parent | 8579b2b494cff3ec2c5b33ba3cbbc3500fab3fc4 (diff) |
Added GEMV synchronisation for the TRSV routine: similar bug as in TRSM
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | src/routines/level2/xtrsv.cpp | 11 |
2 files changed, 8 insertions, 5 deletions
@@ -1,6 +1,6 @@ Development (next version) -- Fixed a bug in the TRSM routine due to missing synchronisations after GEMM calls +- Fixed a bug in the TRSM/TRSV routines due to missing synchronisations after GEMM/GEMV calls - Added a CUDA API to CLBlast: * The library and kernels can be compiled with the CUDA driver API and NVRTC (requires CUDA 7.5) * Two CUDA API sample programs are added: SGEMM and DAXPY diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp index d5d009ff..36c33a76 100644 --- a/src/routines/level2/xtrsv.cpp +++ b/src/routines/level2/xtrsv.cpp @@ -131,10 +131,13 @@ void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle, if (i > 0) { const auto gemv_m = (a_transpose == Transpose::kNo) ? block_size : i; const auto gemv_n = (a_transpose == Transpose::kNo) ? i : block_size; - DoGemv(layout, a_transpose, gemv_m, gemv_n, ConstantOne<T>(), - a_buffer, a_offset + extra_offset_a, a_ld, - x_buffer, x_offset + extra_offset_x, x_inc, ConstantOne<T>(), - x_buffer, x_offset + extra_offset_b, x_inc ); + auto gemv_event = Event(); + auto gemv = Xgemv<T>(queue_, gemv_event.pointer()); + gemv.DoGemv(layout, a_transpose, gemv_m, gemv_n, ConstantOne<T>(), + a_buffer, a_offset + extra_offset_a, a_ld, + x_buffer, x_offset + extra_offset_x, x_inc, ConstantOne<T>(), + x_buffer, x_offset + extra_offset_b, x_inc); + gemv_event.WaitForCompletion(); } // Runs the triangular substitution for the block size |