diff options
Diffstat (limited to 'src/routine.cc')
-rw-r--r-- | src/routine.cc | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/src/routine.cc b/src/routine.cc index 32face4a..064db754 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -209,11 +209,11 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr const size_t dest_one, const size_t dest_two, const size_t dest_ld, const size_t dest_offset, const Buffer &dest, - const bool do_transpose, const bool pad, - const Program &program) { + const bool do_transpose, const bool do_conjugate, + const bool pad, const Program &program) { // Determines whether or not the fast-version could potentially be used - auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && + auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) && (src_one == dest_one) && (src_two == dest_two) && (src_ld == dest_ld); // Determines the right kernel @@ -264,6 +264,9 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr kernel.SetArgument(7, static_cast<int>(dest_ld)); kernel.SetArgument(8, static_cast<int>(dest_offset)); kernel.SetArgument(9, dest()); + if (pad) { + kernel.SetArgument(10, static_cast<int>(do_conjugate)); + } } // Launches the kernel and returns the error code. Uses global and local thread sizes based on |