diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-07-08 17:12:16 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-07-08 17:12:16 +0200 |
commit | 442c31dd508c573023594a803160ddb69d4929f2 (patch) | |
tree | 55474d09086481117204626b27cbec4ee465be9a /src/kernels/level3/xgemm_part3.opencl | |
parent | 75c0e861b842dbd08def5e55696fd79d713afc96 (diff) |
Made the inline keyword in kernels optional currently only enabled for NVIDIA and ARM GPUs
Diffstat (limited to 'src/kernels/level3/xgemm_part3.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_part3.opencl | 22 |
1 files changed, 11 insertions, 11 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index 8ac3a3a8..3f0d590d 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -18,17 +18,17 @@ R"( // ================================================================================================= // Main body of the matrix-multiplication algorithm. It calls the (inlined) functions above. -inline void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, - const __global realM* restrict agm, const __global realN* restrict bgm, - __global realM* cgm, realM cpm[NWI][MWI/VWM] - #if SA == 1 && SB == 1 - , __local realM* alm, __local realN* blm - #elif SA == 1 - , __local realM* alm - #elif SB == 1 - , __local realN* blm - #endif - ) { +INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, + const __global realM* restrict agm, const __global realN* restrict bgm, + __global realM* cgm, realM cpm[NWI][MWI/VWM] + #if SA == 1 && SB == 1 + , __local realM* alm, __local realN* blm + #elif SA == 1 + , __local realM* alm + #elif SB == 1 + , __local realN* blm + #endif + ) { // Allocates workitem-private memory (registers) realM apm[MWI/VWM]; |