diff options
-rw-r--r-- | src/kernels/level3/xgemm_part1.opencl | 2 | ||||
-rw-r--r-- | src/kernels/level3/xgemm_part3.opencl | 2 |
2 files changed, 2 insertions, 2 deletions
diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl index f8dcee73..d15dafc8 100644 --- a/src/kernels/level3/xgemm_part1.opencl +++ b/src/kernels/level3/xgemm_part1.opencl @@ -57,7 +57,7 @@ R"( #define NWG 8 // Tile-size in dimension N (e.g. 64, 128) #endif #ifndef KWG - #define KWG 1 // Tile-size in dimension K (e.g. 8, 16) + #define KWG 8 // Tile-size in dimension K (e.g. 8, 16) #endif #ifndef MDIMC #define MDIMC 8 // Threads per workgroup in M-dimension (e.g. 8, 16, 32) diff --git a/src/kernels/level3/xgemm_part3.opencl b/src/kernels/level3/xgemm_part3.opencl index d7ddeb15..c25c3001 100644 --- a/src/kernels/level3/xgemm_part3.opencl +++ b/src/kernels/level3/xgemm_part3.opencl @@ -84,7 +84,7 @@ INLINE_FUNC void XgemmBody(const int kSizeM, const int kSizeN, const int kSizeK, // Loops over all workitem tiles, unrolled by a factor KWI for (int pwi = 0; pwi < KWG * KREG; pwi += KWI * KREG) { #pragma unroll - for (int _pit = 0; _pit < KWI * KREG; _pit += KREG) { + for (int _pit = 0; _pit < KWI*KREG; _pit += KREG) { #if SA == 0 || SB == 0 int idk = kwg + pwi + _pit; #endif |