From 9fc38cdf5ed44ef41cf3d6cf9e7c32585447c042 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Tue, 23 Jun 2015 17:58:51 +0200 Subject: Added a lower/upper triangular version of the GEMM kernel --- src/kernels/xgemm.opencl | 371 ++++++++++++++++++++++++++++++----------------- 1 file changed, 240 insertions(+), 131 deletions(-) (limited to 'src/kernels') diff --git a/src/kernels/xgemm.opencl b/src/kernels/xgemm.opencl index a4f45e90..4c7ae064 100644 --- a/src/kernels/xgemm.opencl +++ b/src/kernels/xgemm.opencl @@ -127,6 +127,55 @@ R"( // ================================================================================================= +// Initializes the accumulation registers to zero +inline void InitAccRegisters(realM cpm[NWI][MWI/VWM]) { + #pragma unroll + for (int mi=0; mi get_group_id(0)*MWG) { + return; + } + + // Allocates workgroup-private memory (local memory) + #if SA == 1 + __local realM alm[KWG * MWG/VWM]; + #endif + #if SB == 1 + __local realN blm[KWG * NWG/VWN]; + #endif + + // Computes the matrix-multiplication and stores the result in register memory + realM cpm[NWI][MWI/VWM]; + #if SA == 1 && SB == 1 + XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm); + #elif SA == 1 + XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm); + #elif SB == 1 + XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, blm); + #else + XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm); + #endif + + // Stores an MWG * NWG tile of results and performs the multiplication with alpha and beta + StoreResults(cgm, cpm, kSizeN, alpha, beta); +} + +// ================================================================================================= + // End of the C++11 raw string literal )"; -- cgit v1.2.3