summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_part3.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-12-09 10:49:55 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-12-09 10:49:55 +0100
commitd9df62b7942bb8af5fd385b8545aceb1d8b578f3 (patch)
tree41d8d41bdef5a58c37428c84df15e3e92cff24ef /src/kernels/level3/xgemm_part3.opencl
parent540896476d62ce37e7a939d185c15dc930b8a343 (diff)
Fixed defines parsing and substituting in pre-processor; fixed some variable names in kernels
Diffstat (limited to 'src/kernels/level3/xgemm_part3.opencl')
-rw-r--r--src/kernels/level3/xgemm_part3.opencl10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl
index 7e46cef5..f12fb304 100644
--- a/src/kernels/level3/xgemm_part3.opencl
+++ b/src/kernels/level3/xgemm_part3.opencl
@@ -31,9 +31,9 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK,
) {
// Allocates workitem-private memory (registers)
- #pragma promote_to_registers
+ //#pragma promote_to_registers
realM apm[MWI/VWM];
- #pragma promote_to_registers
+ //#pragma promote_to_registers
realN bpm[NWI/VWN];
// Combined thread identifier (volatile to disable caching)
@@ -128,7 +128,7 @@ void XgemmUpper(const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- #pragma promote_to_registers
+ //#pragma promote_to_registers
realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);
@@ -169,7 +169,7 @@ void XgemmLower(const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- #pragma promote_to_registers
+ //#pragma promote_to_registers
realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeN, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);
@@ -214,7 +214,7 @@ void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK,
#endif
// Computes the matrix-multiplication and stores the result in register memory
- #pragma promote_to_registers
+ //#pragma promote_to_registers
realM cpm[NWI*(MWI/VWM)];
#if SA == 1 && SB == 1
XgemmBody(kSizeM, kSizeN, kSizeK, agm, bgm, cgm, cpm, alm, blm);