summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_direct_part1.opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/level3/xgemm_direct_part1.opencl')
-rw-r--r--src/kernels/level3/xgemm_direct_part1.opencl4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part1.opencl b/src/kernels/level3/xgemm_direct_part1.opencl
index 8b650589..7d185224 100644
--- a/src/kernels/level3/xgemm_direct_part1.opencl
+++ b/src/kernels/level3/xgemm_direct_part1.opencl
@@ -184,7 +184,7 @@ INLINE_FUNC void GlobalToPrivateCheckedB(const __global real* restrict bgms, rea
// Caches on-chip local memory into per-thread private memory (registers). This function is specific
// for caching the A input matrix.
-INLINE_FUNC void LocalToPrivateDirectA(__local real* alm, real apm[MWID], const int kg,
+INLINE_FUNC void LocalToPrivateDirectA(LOCAL_PTR real* alm, real apm[MWID], const int kg,
const int a_transpose) {
#pragma unroll
for (int mi=0; mi<MWID; ++mi) {
@@ -195,7 +195,7 @@ INLINE_FUNC void LocalToPrivateDirectA(__local real* alm, real apm[MWID], const
}
// Same as above, but now for the B input matrix
-INLINE_FUNC void LocalToPrivateDirectB(__local real* blm, real bpm[NWID], const int kg,
+INLINE_FUNC void LocalToPrivateDirectB(LOCAL_PTR real* blm, real bpm[NWID], const int kg,
const int b_transpose) {
#pragma unroll
for (int ni=0; ni<NWID; ++ni) {