summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_direct_part3.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-23 20:29:20 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-23 20:29:20 +0100
commit1cbe2ea301c6b28a7d1101142ff347471f7dc197 (patch)
treee4c9b4f8072daebe45e6e1bc5059cf7a798eb9d9 /src/kernels/level3/xgemm_direct_part3.opencl
parent52791bf3553bb47a50dea4ac234f7e1b09c4383c (diff)
Removed arrays as function argument from GEMM kernels for Vivante OpenCL compiler
Diffstat (limited to 'src/kernels/level3/xgemm_direct_part3.opencl')
-rw-r--r--src/kernels/level3/xgemm_direct_part3.opencl18
1 files changed, 16 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl
index e1532e98..0822c95f 100644
--- a/src/kernels/level3/xgemm_direct_part3.opencl
+++ b/src/kernels/level3/xgemm_direct_part3.opencl
@@ -129,7 +129,14 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize
}
// Stores a tile of results and performs the multiplication with alpha and beta
- StoreResultsDirect(cgm, cpd, idm, idn, alpha, beta, c_ld, c_offset, c_transpose);
+ #pragma unroll
+ for (int _ni = 0; _ni < NWID; _ni += 1) {
+ #pragma unroll
+ for (int _mi = 0; _mi < MWID; _mi += 1) {
+ StoreResultsDirect(cgm, cpd[_ni * MWID + _mi], _mi, _ni, idm, idn,
+ alpha, beta, c_ld, c_offset, c_transpose);
+ }
+ }
}
// Simple but slower version for the parts on the edge (incomplete tiles in M and N-dimensions)
@@ -197,7 +204,14 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize
}
// Stores a tile of results and performs the multiplication with alpha and beta
- StoreResultsChecked(cgm, cpd, idm, idn, kSizeM, kSizeN, alpha, beta, c_ld, c_offset, c_transpose);
+ #pragma unroll
+ for (int _ni = 0; _ni < NWID; _ni += 1) {
+ #pragma unroll
+ for (int _mi = 0; _mi < MWID; _mi += 1) {
+ StoreResultsChecked(cgm, cpd[_ni * MWID + _mi], _mi, _ni, idm, idn, kSizeM, kSizeN,
+ alpha, beta, c_ld, c_offset, c_transpose);
+ }
+ }
}
}