summaryrefslogtreecommitdiff
path: root/src/routine.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/routine.cc')
-rw-r--r--src/routine.cc9
1 files changed, 6 insertions, 3 deletions
diff --git a/src/routine.cc b/src/routine.cc
index 32face4a..064db754 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -209,11 +209,11 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr
const size_t dest_one, const size_t dest_two,
const size_t dest_ld, const size_t dest_offset,
const Buffer &dest,
- const bool do_transpose, const bool pad,
- const Program &program) {
+ const bool do_transpose, const bool do_conjugate,
+ const bool pad, const Program &program) {
// Determines whether or not the fast-version could potentially be used
- auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) &&
+ auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) &&
(src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld);
// Determines the right kernel
@@ -264,6 +264,9 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr
kernel.SetArgument(7, static_cast<int>(dest_ld));
kernel.SetArgument(8, static_cast<int>(dest_offset));
kernel.SetArgument(9, dest());
+ if (pad) {
+ kernel.SetArgument(10, static_cast<int>(do_conjugate));
+ }
}
// Launches the kernel and returns the error code. Uses global and local thread sizes based on