diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-23 20:29:20 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-23 20:29:20 +0100 |
commit | 1cbe2ea301c6b28a7d1101142ff347471f7dc197 (patch) | |
tree | e4c9b4f8072daebe45e6e1bc5059cf7a798eb9d9 /src/kernels/level3/xgemm_direct_part3.opencl | |
parent | 52791bf3553bb47a50dea4ac234f7e1b09c4383c (diff) |
Removed arrays as function argument from GEMM kernels for Vivante OpenCL compiler
Diffstat (limited to 'src/kernels/level3/xgemm_direct_part3.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_direct_part3.opencl | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl index e1532e98..0822c95f 100644 --- a/src/kernels/level3/xgemm_direct_part3.opencl +++ b/src/kernels/level3/xgemm_direct_part3.opencl @@ -129,7 +129,14 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize } // Stores a tile of results and performs the multiplication with alpha and beta - StoreResultsDirect(cgm, cpd, idm, idn, alpha, beta, c_ld, c_offset, c_transpose); + #pragma unroll + for (int _ni = 0; _ni < NWID; _ni += 1) { + #pragma unroll + for (int _mi = 0; _mi < MWID; _mi += 1) { + StoreResultsDirect(cgm, cpd[_ni * MWID + _mi], _mi, _ni, idm, idn, + alpha, beta, c_ld, c_offset, c_transpose); + } + } } // Simple but slower version for the parts on the edge (incomplete tiles in M and N-dimensions) @@ -197,7 +204,14 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize } // Stores a tile of results and performs the multiplication with alpha and beta - StoreResultsChecked(cgm, cpd, idm, idn, kSizeM, kSizeN, alpha, beta, c_ld, c_offset, c_transpose); + #pragma unroll + for (int _ni = 0; _ni < NWID; _ni += 1) { + #pragma unroll + for (int _mi = 0; _mi < MWID; _mi += 1) { + StoreResultsChecked(cgm, cpd[_ni * MWID + _mi], _mi, _ni, idm, idn, kSizeM, kSizeN, + alpha, beta, c_ld, c_offset, c_transpose); + } + } } } |