summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_part3.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-12-07 22:05:29 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-12-07 22:05:29 +0100
commit540896476d62ce37e7a939d185c15dc930b8a343 (patch)
treef9799153ab3fccebc5c3b3a9aa2b1c2db46e47c2 /src/kernels/level3/xgemm_part3.opencl
parent0f9637bbac6248a381d7012d7224331d3d394efb (diff)
Added register promotion to the main GEMM kernel
Diffstat (limited to 'src/kernels/level3/xgemm_part3.opencl')
-rw-r--r--src/kernels/level3/xgemm_part3.opencl13
1 files changed, 9 insertions, 4 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl
index 4e85c4a8..7e46cef5 100644
--- a/src/kernels/level3/xgemm_part3.opencl
+++ b/src/kernels/level3/xgemm_part3.opencl
@@ -20,7 +20,7 @@ R"(
// Main body of the matrix-multiplication algorithm. It calls various (inlined) functions.
INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
const __global realM* restrict agm, const __global realN* restrict bgm,
- __global realM* cgm, realM cpm[NWI][MWI/VWM]
+ __global realM* cgm, realM cpm[NWI*MWI/VWM]
#if SA == 1 && SB == 1
, LOCAL_PTR realM* alm, LOCAL_PTR realN* blm
#elif SA == 1
@@ -31,7 +31,9 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
) {
// Allocates workitem-private memory (registers)
+ #pragma promote_to_registers
realM apm[MWI/VWM];
+ #pragma promote_to_registers
realN bpm[NWI/VWN];
// Combined thread identifier (volatile to disable caching)
@@ -126,7 +128,8 @@ void XgemmUpper(const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- realM cpm[NWI][MWI/VWM];
+ #pragma promote_to_registers
+ realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);
#elif SA == 1
@@ -166,7 +169,8 @@ void XgemmLower(const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- realM cpm[NWI][MWI/VWM];
+ #pragma promote_to_registers
+ realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);
#elif SA == 1
@@ -210,7 +214,8 @@ void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- realM cpm[NWI][MWI/VWM];
+ #pragma promote_to_registers
+ realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeM, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);
#elif SA == 1