diff options
Diffstat (limited to 'src/kernels/level3/xgemm_part4.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_part4.opencl | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/src/kernels/level3/xgemm_part4.opencl b/src/kernels/level3/xgemm_part4.opencl index b1f1ade6..05524337 100644 --- a/src/kernels/level3/xgemm_part4.opencl +++ b/src/kernels/level3/xgemm_part4.opencl @@ -19,7 +19,11 @@ R"( #if defined(ROUTINE_SYRK) || defined(ROUTINE_HERK) || defined(ROUTINE_SYR2K) || defined(ROUTINE_HER2K) // Main entry point of the kernel. This is the upper-triangular version. -__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#endif void XgemmUpper(const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, @@ -55,7 +59,11 @@ void XgemmUpper(const int kSizeN, const int kSizeK, } // Main entry point of the kernel. This is the lower-triangular version. -__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#endif void XgemmLower(const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, @@ -95,7 +103,11 @@ void XgemmLower(const int kSizeN, const int kSizeK, #else // Main entry point of the kernel. This is the regular full version. -__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1))) +#endif void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, |