From 52ccaf5b25e14c9ce032315e5e96b1f27886d481 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 16 Jun 2016 18:07:46 +0200 Subject: Added XOMATCOPY routines to perform out-of-place matrix scaling, copying, and/or transposing --- src/kernels/level3/copy_fast.opencl | 44 +++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'src/kernels/level3/copy_fast.opencl') diff --git a/src/kernels/level3/copy_fast.opencl b/src/kernels/level3/copy_fast.opencl index bfbfacd4..09e54e6d 100644 --- a/src/kernels/level3/copy_fast.opencl +++ b/src/kernels/level3/copy_fast.opencl @@ -38,13 +38,53 @@ R"( __attribute__((reqd_work_group_size(COPY_DIMX, COPY_DIMY, 1))) __kernel void CopyMatrixFast(const int ld, __global const realC* restrict src, - __global realC* dest) { + __global realC* dest, + const __constant real* restrict arg_alpha) { + const real alpha = arg_alpha[0]; #pragma unroll for (int w_one=0; w_one