From fa6e5e67f585b77d34c3031c176de9a0f7904aa9 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 27 Oct 2017 22:12:30 +0200 Subject: Fixed a bug when using the matrix A-offset argument for the TRSM routine --- src/routines/level3/xtrsm.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/routines') diff --git a/src/routines/level3/xtrsm.cpp b/src/routines/level3/xtrsm.cpp index 119bf25d..d622e3bf 100644 --- a/src/routines/level3/xtrsm.cpp +++ b/src/routines/level3/xtrsm.cpp @@ -143,7 +143,7 @@ void Xtrsm::TrsmColMajor(const Side side, const Triangle triangle, auto gemm2 = Xgemm(queue_, gemm2_event.pointer()); gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo, m - i - block_size, n, block_size, ConstantNegOne(), - a_buffer, this_a_offset, a_ld, + a_buffer, this_a_offset + a_offset, a_ld, x_buffer, x_offset + i, x_ld, gemm_alpha, b_buffer, b_offset + i + block_size, b_ld); gemm2_event.WaitForCompletion(); @@ -172,7 +172,7 @@ void Xtrsm::TrsmColMajor(const Side side, const Triangle triangle, auto gemm2 = Xgemm(queue_, gemm2_event.pointer()); gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo, i, n, current_block_size, ConstantNegOne(), - a_buffer, this_a_offset, a_ld, + a_buffer, this_a_offset + a_offset, a_ld, x_buffer, x_offset + i, x_ld, gemm_alpha, b_buffer, b_offset, b_ld); gemm2_event.WaitForCompletion(); @@ -206,7 +206,7 @@ void Xtrsm::TrsmColMajor(const Side side, const Triangle triangle, gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose, m, i, current_block_size, ConstantNegOne(), x_buffer, x_offset + i * x_ld, x_ld, - a_buffer, this_a_offset, a_ld, gemm_alpha, + a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha, b_buffer, b_offset, b_ld); gemm2_event.WaitForCompletion(); } @@ -233,7 +233,7 @@ void Xtrsm::TrsmColMajor(const Side side, const Triangle triangle, gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose, m, n - i - block_size, block_size, ConstantNegOne(), x_buffer, x_offset + i * x_ld, x_ld, - a_buffer, this_a_offset, a_ld, gemm_alpha, + a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha, b_buffer, b_offset + (i + block_size) * b_ld, b_ld); gemm2_event.WaitForCompletion(); } -- cgit v1.2.3