summaryrefslogtreecommitdiff
path: root/src/routines/level3/xgemm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level3/xgemm.cpp')
-rw-r--r--src/routines/level3/xgemm.cpp34
1 files changed, 17 insertions, 17 deletions
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 4f70dc7a..0015b629 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -126,16 +126,16 @@ void Xgemm<T>::DoGemm(const Layout layout,
// overhead of these extra kernels might not be ideal for certain devices/arguments.
template <typename T>
void Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
- const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
- const bool a_conjugate, const bool b_conjugate,
- const size_t a_one, const size_t a_two, const bool a_want_rotated,
- const size_t b_one, const size_t b_two, const bool b_want_rotated,
- const size_t c_one, const size_t c_two, const bool c_want_rotated) {
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
+ const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
+ const bool a_conjugate, const bool b_conjugate,
+ const size_t a_one, const size_t a_two, const bool a_want_rotated,
+ const size_t b_one, const size_t b_two, const bool b_want_rotated,
+ const size_t c_one, const size_t c_two, const bool c_want_rotated) {
// Calculates the ceiled versions of m, n, and k
const auto m_ceiled = Ceil(m, db_["MWG"]);
const auto n_ceiled = Ceil(n, db_["NWG"]);
@@ -247,13 +247,13 @@ void Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k,
// The direct version of GEMM, requiring just one kernel, no pre or post-processing kernels.
template <typename T>
void Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
- const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
- const bool a_conjugate, const bool b_conjugate) {
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
+ const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
+ const bool a_conjugate, const bool b_conjugate) {
// Loads the program from the database
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);