summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/common.hpp15
-rw-r--r--src/routines/level3/xtrsm.cpp3
-rw-r--r--src/routines/levelx/xinvert.cpp2
3 files changed, 11 insertions, 9 deletions
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index bdea0086..47d62027 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -38,17 +38,18 @@ template <typename T>
void FillMatrix(Queue &queue, const Device &device,
const Program &program, const Database &,
EventPointer event, const std::vector<Event> &waitForEvents,
- const size_t n, const size_t ld, const size_t offset,
+ const size_t m, const size_t n, const size_t ld, const size_t offset,
const Buffer<T> &dest,
const T constant_value) {
auto kernel = Kernel(program, "FillMatrix");
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, static_cast<int>(ld));
- kernel.SetArgument(2, static_cast<int>(offset));
- kernel.SetArgument(3, dest());
- kernel.SetArgument(4, GetRealArg(constant_value));
+ kernel.SetArgument(0, static_cast<int>(m));
+ kernel.SetArgument(1, static_cast<int>(n));
+ kernel.SetArgument(2, static_cast<int>(ld));
+ kernel.SetArgument(3, static_cast<int>(offset));
+ kernel.SetArgument(4, dest());
+ kernel.SetArgument(5, GetRealArg(constant_value));
auto local = std::vector<size_t>{8, 8};
- auto global = std::vector<size_t>{Ceil(ld, 8), Ceil(n, 8)};
+ auto global = std::vector<size_t>{Ceil(m, 8), Ceil(n, 8)};
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}
diff --git a/src/routines/level3/xtrsm.cpp b/src/routines/level3/xtrsm.cpp
index 42855362..b734dd2d 100644
--- a/src/routines/level3/xtrsm.cpp
+++ b/src/routines/level3/xtrsm.cpp
@@ -91,6 +91,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
// Creates a copy of B to avoid overwriting input in GEMM while computing output
const auto b_size = b_ld * (n - 1) + m + b_offset;
const auto x_one = m;
+ const auto x_two = n;
const auto x_size = b_size;
const auto x_ld = b_ld;
const auto x_offset = b_offset;
@@ -105,7 +106,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
auto eventWaitList = std::vector<Event>();
auto fill_matrix_event = Event();
FillMatrix(queue_, device_, program_, db_, fill_matrix_event.pointer(), eventWaitList,
- x_one, x_ld, x_offset, x_buffer, ConstantZero<T>());
+ x_one, x_two, x_ld, x_offset, x_buffer, ConstantZero<T>());
fill_matrix_event.WaitForCompletion();
// Inverts the diagonal blocks
diff --git a/src/routines/levelx/xinvert.cpp b/src/routines/levelx/xinvert.cpp
index bcc3706d..5c21d5ce 100644
--- a/src/routines/levelx/xinvert.cpp
+++ b/src/routines/levelx/xinvert.cpp
@@ -73,7 +73,7 @@ void Xinvert<T>::InvertMatrixDiagonalBlocks(const Layout layout, const Triangle
auto event_wait_list = std::vector<Event>();
auto fill_matrix_event = Event();
FillMatrix(queue_, device_, program_, db_, fill_matrix_event.pointer(), event_wait_list,
- num_blocks * block_size, block_size, 0, dest, ConstantZero<T>());
+ block_size, num_blocks * block_size, block_size, 0, dest, ConstantZero<T>());
event_wait_list.push_back(fill_matrix_event);
// Inverts the diagonal IB by IB inner blocks of the matrix: one block per work-group