summaryrefslogtreecommitdiff
path: root/src/kernels/level3/transpose_pad.opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/level3/transpose_pad.opencl')
-rw-r--r--src/kernels/level3/transpose_pad.opencl38
1 files changed, 19 insertions, 19 deletions
diff --git a/src/kernels/level3/transpose_pad.opencl b/src/kernels/level3/transpose_pad.opencl
index fb60ce75..49c5b9a3 100644
--- a/src/kernels/level3/transpose_pad.opencl
+++ b/src/kernels/level3/transpose_pad.opencl
@@ -24,15 +24,15 @@ R"(
// Transposes a matrix from source to destination. The output is padded with zero values in case the
// destination matrix dimensions are larger than the transposed source matrix dimensions.
-inline void _TransposePadMatrix(__local real* tile,
- const int src_one, const int src_two,
- const int src_ld, const int src_offset,
- __global const real* restrict src,
- const int dest_one, const int dest_two,
- const int dest_ld, const int dest_offset,
- __global real* dest,
- const real alpha,
- const int do_conjugate) {
+INLINE_FUNC void _TransposePadMatrix(__local real* tile,
+ const int src_one, const int src_two,
+ const int src_ld, const int src_offset,
+ __global const real* restrict src,
+ const int dest_one, const int dest_two,
+ const int dest_ld, const int dest_offset,
+ __global real* dest,
+ const real alpha,
+ const int do_conjugate) {
// Loop over the work per thread
#pragma unroll
@@ -105,16 +105,16 @@ void TransposePadMatrix(const int src_one, const int src_two,
// Transposes a matrix, while considering possible padding in the source matrix. Data is read from a
// padded source matrix, but only the actual data is written back to the transposed destination
// matrix. This kernel optionally checks for upper/lower triangular matrices.
-inline void _TransposeMatrix(__local real* tile,
- const int src_one, const int src_two,
- const int src_ld, const int src_offset,
- __global const real* restrict src,
- const int dest_one, const int dest_two,
- const int dest_ld, const int dest_offset,
- __global real* dest,
- const real alpha,
- const int upper, const int lower,
- const int diagonal_imag_zero) {
+INLINE_FUNC void _TransposeMatrix(__local real* tile,
+ const int src_one, const int src_two,
+ const int src_ld, const int src_offset,
+ __global const real* restrict src,
+ const int dest_one, const int dest_two,
+ const int dest_ld, const int dest_offset,
+ __global real* dest,
+ const real alpha,
+ const int upper, const int lower,
+ const int diagonal_imag_zero) {
// Loop over the work per thread
#pragma unroll