summaryrefslogtreecommitdiff
path: root/src/kernels/level3/xgemm_part4.opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/level3/xgemm_part4.opencl')
-rw-r--r--src/kernels/level3/xgemm_part4.opencl18
1 files changed, 15 insertions, 3 deletions
diff --git a/src/kernels/level3/xgemm_part4.opencl b/src/kernels/level3/xgemm_part4.opencl
index b1f1ade6..05524337 100644
--- a/src/kernels/level3/xgemm_part4.opencl
+++ b/src/kernels/level3/xgemm_part4.opencl
@@ -19,7 +19,11 @@ R"(
#if defined(ROUTINE_SYRK) || defined(ROUTINE_HERK) || defined(ROUTINE_SYR2K) || defined(ROUTINE_HER2K)
// Main entry point of the kernel. This is the upper-triangular version.
-__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+ __kernel
+#else
+ __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#endif
void XgemmUpper(const int kSizeN, const int kSizeK,
const real_arg arg_alpha,
const real_arg arg_beta,
@@ -55,7 +59,11 @@ void XgemmUpper(const int kSizeN, const int kSizeK,
}
// Main entry point of the kernel. This is the lower-triangular version.
-__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+ __kernel
+#else
+ __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#endif
void XgemmLower(const int kSizeN, const int kSizeK,
const real_arg arg_alpha,
const real_arg arg_beta,
@@ -95,7 +103,11 @@ void XgemmLower(const int kSizeN, const int kSizeK,
#else
// Main entry point of the kernel. This is the regular full version.
-__kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+ __kernel
+#else
+ __kernel __attribute__((reqd_work_group_size(MDIMC, NDIMC, 1)))
+#endif
void Xgemm(const int kSizeM, const int kSizeN, const int kSizeK,
const real_arg arg_alpha,
const real_arg arg_beta,