summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_part3.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-07-08 17:12:16 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-07-08 17:12:16 +0200
commit442c31dd508c573023594a803160ddb69d4929f2 (patch)
tree55474d09086481117204626b27cbec4ee465be9a /src/kernels/level3/xgemm_part3.opencl
parent75c0e861b842dbd08def5e55696fd79d713afc96 (diff)
Made the inline keyword in kernels optional currently only enabled for NVIDIA and ARM GPUs
Diffstat (limited to 'src/kernels/level3/xgemm_part3.opencl')
-rw-r--r--src/kernels/level3/xgemm_part3.opencl22
1 files changed, 11 insertions, 11 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl
index 8ac3a3a8..3f0d590d 100644
--- a/src/kernels/level3/xgemm_part3.opencl
+++ b/src/kernels/level3/xgemm_part3.opencl
@@ -18,17 +18,17 @@ R"(
// =================================================================================================
// Main body of the matrix-multiplication algorithm. It calls the (inlined) functions above.
-inline void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
- const __global realM* restrict agm, const __global realN* restrict bgm,
- __global realM* cgm, realM cpm[NWI][MWI/VWM]
- #if SA == 1 && SB == 1
- , __local realM* alm, __local realN* blm
- #elif SA == 1
- , __local realM* alm
- #elif SB == 1
- , __local realN* blm
- #endif
- ) {
+INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
+ const __global realM* restrict agm, const __global realN* restrict bgm,
+ __global realM* cgm, realM cpm[NWI][MWI/VWM]
+ #if SA == 1 && SB == 1
+ , __local realM* alm, __local realN* blm
+ #elif SA == 1
+ , __local realM* alm
+ #elif SB == 1
+ , __local realN* blm
+ #endif
+ ) {
// Allocates workitem-private memory (registers)
realM apm[MWI/VWM];