summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_batched.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-12-09 14:09:13 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-12-09 14:09:13 +0100
commit23e3a85f2c328d4a23db2fca5d1d89d78163711f (patch)
tree02b8dd5364d958184c45c9bfdb2c28e38d72b24e /src/kernels/level3/xgemm_batched.opencl
parentd9df62b7942bb8af5fd385b8545aceb1d8b578f3 (diff)
Reformatted GEMM kernel to support array-to-register promotion
Diffstat (limited to 'src/kernels/level3/xgemm_batched.opencl')
-rw-r--r--src/kernels/level3/xgemm_batched.opencl14
1 files changed, 5 insertions, 9 deletions
diff --git a/src/kernels/level3/xgemm_batched.opencl b/src/kernels/level3/xgemm_batched.opencl
index c7bf10d5..372f910b 100644
--- a/src/kernels/level3/xgemm_batched.opencl
+++ b/src/kernels/level3/xgemm_batched.opencl
@@ -46,20 +46,16 @@ void XgemmBatched(const int kSizeM, const int kSizeN, const int kSizeK,
__local realN blm[KWG * NWG/VWN];
#endif
- // Computes the matrix-multiplication and stores the result in register memory
- realM cpm[NWI][MWI/VWM];
+ // Computes the matrix-multiplication and stores the result in global memory
#if SA == 1 && SB == 1
- XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, cpm, alm, blm);
+ XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, alm, blm);
#elif SA == 1
- XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, cpm, alm);
+ XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, alm);
#elif SB == 1
- XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, cpm, blm);
+ XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta, blm);
#else
- XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, cpm);
+ XgemmBody(kSizeM, kSizeN, kSizeK, agm_, bgm_, cgm_, alpha, beta);
#endif
-
- // Stores an MWG * NWG tile of results and performs the multiplication with alpha and beta
- StoreResults(cgm_, cpm, kSizeM, alpha, beta);
}
// =================================================================================================