diff options
Diffstat (limited to 'src/kernels/level3/xgemm_direct_part1.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_direct_part1.opencl | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part1.opencl b/src/kernels/level3/xgemm_direct_part1.opencl index 8b650589..7d185224 100644 --- a/src/kernels/level3/xgemm_direct_part1.opencl +++ b/src/kernels/level3/xgemm_direct_part1.opencl @@ -184,7 +184,7 @@ INLINE_FUNC void GlobalToPrivateCheckedB(const __global real* restrict bgms, rea // Caches on-chip local memory into per-thread private memory (registers). This function is specific // for caching the A input matrix. -INLINE_FUNC void LocalToPrivateDirectA(__local real* alm, real apm[MWID], const int kg, +INLINE_FUNC void LocalToPrivateDirectA(LOCAL_PTR real* alm, real apm[MWID], const int kg, const int a_transpose) { #pragma unroll for (int mi=0; mi<MWID; ++mi) { @@ -195,7 +195,7 @@ INLINE_FUNC void LocalToPrivateDirectA(__local real* alm, real apm[MWID], const } // Same as above, but now for the B input matrix -INLINE_FUNC void LocalToPrivateDirectB(__local real* blm, real bpm[NWID], const int kg, +INLINE_FUNC void LocalToPrivateDirectB(LOCAL_PTR real* blm, real bpm[NWID], const int kg, const int b_transpose) { #pragma unroll for (int ni=0; ni<NWID; ++ni) { |