diff options
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | src/routines/level3/xtrsm.cpp | 8 |
2 files changed, 5 insertions, 4 deletions
@@ -1,6 +1,7 @@ Development (next version) - Fixed a bug in the TRSM/TRSV routines due to missing synchronisations after GEMM/GEMV calls +- Fixed a bug in TRSM when using the a-offset argument - Added a CUDA API to CLBlast: * The library and kernels can be compiled with the CUDA driver API and NVRTC (requires CUDA 7.5) * Two CUDA API sample programs are added: SGEMM and DAXPY diff --git a/src/routines/level3/xtrsm.cpp b/src/routines/level3/xtrsm.cpp index 119bf25d..d622e3bf 100644 --- a/src/routines/level3/xtrsm.cpp +++ b/src/routines/level3/xtrsm.cpp @@ -143,7 +143,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle, auto gemm2 = Xgemm<T>(queue_, gemm2_event.pointer()); gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo, m - i - block_size, n, block_size, ConstantNegOne<T>(), - a_buffer, this_a_offset, a_ld, + a_buffer, this_a_offset + a_offset, a_ld, x_buffer, x_offset + i, x_ld, gemm_alpha, b_buffer, b_offset + i + block_size, b_ld); gemm2_event.WaitForCompletion(); @@ -172,7 +172,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle, auto gemm2 = Xgemm<T>(queue_, gemm2_event.pointer()); gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo, i, n, current_block_size, ConstantNegOne<T>(), - a_buffer, this_a_offset, a_ld, + a_buffer, this_a_offset + a_offset, a_ld, x_buffer, x_offset + i, x_ld, gemm_alpha, b_buffer, b_offset, b_ld); gemm2_event.WaitForCompletion(); @@ -206,7 +206,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle, gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose, m, i, current_block_size, ConstantNegOne<T>(), x_buffer, x_offset + i * x_ld, x_ld, - a_buffer, this_a_offset, a_ld, gemm_alpha, + a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha, b_buffer, b_offset, b_ld); gemm2_event.WaitForCompletion(); } @@ -233,7 +233,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle, gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose, m, n - i - block_size, block_size, ConstantNegOne<T>(), x_buffer, x_offset + i * x_ld, x_ld, - a_buffer, this_a_offset, a_ld, gemm_alpha, + a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha, b_buffer, b_offset + (i + block_size) * b_ld, b_ld); gemm2_event.WaitForCompletion(); } |