summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-05-15 22:04:55 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-05-15 22:04:55 +0200
commit8400ee3a097952a49371973780b47fcbf63e9a5f (patch)
treec7e853f1775e01c8005b1ea0a33ff8be2c416a91 /src
parent512b83dbad50cf04ea58ed6edc2def4fa6fc12ad (diff)
Fixed an TRSM issue caused by incorrect block size calculation
Diffstat (limited to 'src')
-rw-r--r--src/routines/level3/xtrsm.cpp14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/routines/level3/xtrsm.cpp b/src/routines/level3/xtrsm.cpp
index c1c07d10..685d458b 100644
--- a/src/routines/level3/xtrsm.cpp
+++ b/src/routines/level3/xtrsm.cpp
@@ -145,9 +145,10 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
// True when (upper triangular) or (lower triangular and transposed)
else {
- const auto current_block_size = (m % block_size == 0) ? block_size : (m % block_size);
- const auto i_start = static_cast<int>(m) - static_cast<int>(current_block_size);
+ const auto special_block_size = (m % block_size == 0) ? block_size : (m % block_size);
+ const auto i_start = static_cast<int>(m) - static_cast<int>(special_block_size);
for (auto i = i_start; i >= 0; i -= static_cast<int>(block_size)) {
+ const auto current_block_size = (i == i_start) ? special_block_size : block_size;
const auto gemm_alpha = (i == i_start) ? alpha : ConstantOne<T>();
DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo,
current_block_size, n, current_block_size, gemm_alpha,
@@ -157,7 +158,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
if (i - static_cast<int>(block_size) < 0) { break; }
const auto this_a_offset = (a_transpose == Transpose::kNo) ? i * a_ld : i;
DoGemm(Layout::kColMajor, a_transpose, Transpose::kNo,
- i, n, block_size, ConstantNegOne<T>(),
+ i, n, current_block_size, ConstantNegOne<T>(),
a_buffer, this_a_offset, a_ld,
x_buffer, x_offset + i, x_ld, gemm_alpha,
b_buffer, b_offset, b_ld);
@@ -170,9 +171,10 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
// True when (lower triangular) or (upper triangular and transposed)
if (condition) {
- const auto current_block_size = (n % block_size == 0) ? block_size : (n % block_size);
- const auto i_start = static_cast<int>(n) - static_cast<int>(current_block_size);
+ const auto special_block_size = (n % block_size == 0) ? block_size : (n % block_size);
+ const auto i_start = static_cast<int>(n) - static_cast<int>(special_block_size);
for (auto i = i_start; i >= 0; i -= static_cast<int>(block_size)) {
+ const auto current_block_size = (i == i_start) ? special_block_size : block_size;
const auto gemm_alpha = (i == i_start) ? alpha : ConstantOne<T>();
DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose,
m, current_block_size, current_block_size, gemm_alpha,
@@ -182,7 +184,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
if (i - static_cast<int>(block_size) < 0) { break; }
const auto this_a_offset = (a_transpose == Transpose::kNo) ? i : i * a_ld;
DoGemm(Layout::kColMajor, Transpose::kNo, a_transpose,
- m, i, block_size, ConstantNegOne<T>(),
+ m, i, current_block_size, ConstantNegOne<T>(),
x_buffer, x_offset + i * x_ld, x_ld,
a_buffer, this_a_offset, a_ld, gemm_alpha,
b_buffer, b_offset, b_ld);