diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-07 22:05:29 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-07 22:05:29 +0100 |
commit | 540896476d62ce37e7a939d185c15dc930b8a343 (patch) | |
tree | f9799153ab3fccebc5c3b3a9aa2b1c2db46e47c2 /src/kernels/level3/xgemm_part3.opencl | |
parent | 0f9637bbac6248a381d7012d7224331d3d394efb (diff) |
Added register promotion to the main GEMM kernel
Diffstat (limited to 'src/kernels/level3/xgemm_part3.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_part3.opencl | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index 4e85c4a8..7e46cef5 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -20,7 +20,7 @@ R"( // Main body of the matrix-multiplication algorithm. It calls various (inlined) functions. INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, const __global realM* restrict agm, const __global realN* restrict bgm, - __global realM* cgm, realM cpm[NWI][MWI/VWM] + __global realM* cgm, realM cpm[NWI*MWI/VWM] #if SA == 1 && SB == 1 , LOCAL_PTR realM* alm, LOCAL_PTR realN* blm #elif SA == 1 @@ -31,7 +31,9 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, ) { // Allocates workitem-private memory (registers) + #pragma promote_to_registers realM apm[MWI/VWM]; + #pragma promote_to_registers realN bpm[NWI/VWN]; // Combined thread identifier (volatile to disable caching) @@ -126,7 +128,8 @@ void XgemmUpper(const int kSizeN, const int kSizeK, #endif // Computes the matrix-multiplication and stores the result in register memory - realM cpm[NWI][MWI/VWM]; + #pragma promote_to_registers + realM cpm[NWI*(MWI/VWM)]; #if SA == 1 && SB == 1 XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm); #elif SA == 1 @@ -166,7 +169,8 @@ void XgemmLower(const int kSizeN, const int kSizeK, #endif // Computes the matrix-multiplication and stores the result in register memory - realM cpm[NWI][MWI/VWM]; + #pragma promote_to_registers + realM cpm[NWI*(MWI/VWM)]; #if SA == 1 && SB == 1 XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm); #elif SA == 1 @@ -210,7 +214,8 @@ void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK, #endif // Computes the matrix-multiplication and stores the result in register memory - realM cpm[NWI][MWI/VWM]; + #pragma promote_to_registers + realM cpm[NWI*(MWI/VWM)]; #if SA == 1 && SB == 1 XgemmBody(kSizeM, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm); #elif SA == 1 |