summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--src/routines/level3/xtrsm.cpp8
2 files changed, 5 insertions, 4 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4d1bb764..14a6dd22 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
Development (next version)
- Fixed a bug in the TRSM/TRSV routines due to missing synchronisations after GEMM/GEMV calls
+- Fixed a bug in TRSM when using the a-offset argument
- Added a CUDA API to CLBlast:
* The library and kernels can be compiled with the CUDA driver API and NVRTC (requires CUDA 7.5)
* Two CUDA API sample programs are added: SGEMM and DAXPY
diff --git a/src/routines/level3/xtrsm.cpp b/src/routines/level3/xtrsm.cpp
index 119bf25d..d622e3bf 100644
--- a/src/routines/level3/xtrsm.cpp
+++ b/src/routines/level3/xtrsm.cpp
@@ -143,7 +143,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
auto gemm2 = Xgemm<T>(queue_, gemm2_event.pointer());
gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo,
m - i - block_size, n, block_size, ConstantNegOne<T>(),
- a_buffer, this_a_offset, a_ld,
+ a_buffer, this_a_offset + a_offset, a_ld,
x_buffer, x_offset + i, x_ld, gemm_alpha,
b_buffer, b_offset + i + block_size, b_ld);
gemm2_event.WaitForCompletion();
@@ -172,7 +172,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
auto gemm2 = Xgemm<T>(queue_, gemm2_event.pointer());
gemm2.DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo,
i, n, current_block_size, ConstantNegOne<T>(),
- a_buffer, this_a_offset, a_ld,
+ a_buffer, this_a_offset + a_offset, a_ld,
x_buffer, x_offset + i, x_ld, gemm_alpha,
b_buffer, b_offset, b_ld);
gemm2_event.WaitForCompletion();
@@ -206,7 +206,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose,
m, i, current_block_size, ConstantNegOne<T>(),
x_buffer, x_offset + i * x_ld, x_ld,
- a_buffer, this_a_offset, a_ld, gemm_alpha,
+ a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha,
b_buffer, b_offset, b_ld);
gemm2_event.WaitForCompletion();
}
@@ -233,7 +233,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
gemm2.DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose,
m, n - i - block_size, block_size, ConstantNegOne<T>(),
x_buffer, x_offset + i * x_ld, x_ld,
- a_buffer, this_a_offset, a_ld, gemm_alpha,
+ a_buffer, this_a_offset + a_offset, a_ld, gemm_alpha,
b_buffer, b_offset + (i + block_size) * b_ld, b_ld);
gemm2_event.WaitForCompletion();
}