Merge tag '1.6.0' into gspr/post-bookworm

author: Gard Spreemann <gspr@nonempty.org> 2023-06-08 11:52:00 +0200
committer: Gard Spreemann <gspr@nonempty.org> 2023-06-08 11:52:00 +0200
commit: 63870a2e60c1bc8bfa7e3672457b551a8e51ffaf (patch)
tree: fe2c0cd5f62e3fbd17e58d3903ec6bb37983f620 /src/kernels/level3/xgemm_direct_batched.opencl
parent: d31fb141cb597aaf405674621aa25f263aa375e1 (diff)
parent: b0b302889cc786907efb080c4e1beea30d2fa39f (diff)
1 files changed, 40 insertions, 8 deletions
diff --git a/src/kernels/level3/xgemm_direct_batched.opencl b/src/kernels/level3/xgemm_direct_batched.opencl
index d15ed31e..102ae762 100644
--- a/src/kernels/level3/xgemm_direct_batched.opencl
+++ b/src/kernels/level3/xgemm_direct_batched.opencl
@@ -20,7 +20,11 @@ R"(
 #if defined(ROUTINE_GEMMBATCHED)
 
 // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, non-transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK,
                           const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
                           const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
@@ -41,7 +45,11 @@ void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK,
 }
 
 // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK,
                           const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
                           const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
@@ -62,7 +70,11 @@ void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK,
 }
 
 // Direct version of the batched GEMM kernel with [A, B] = [transposed, non-transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK,
                           const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
                           const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
@@ -83,7 +95,11 @@ void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK,
 }
 
 // Direct version of the batched GEMM kernel with [A, B] = [transposed, transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK,
                           const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
                           const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
@@ -108,7 +124,11 @@ void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK,
 #if defined(ROUTINE_GEMMSTRIDEDBATCHED)
 
 // Direct version of the strided-batched GEMM kernel with [A, B] = [non-transposed, non-transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectStridedBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK,
                                  const real_arg arg_alpha, const real_arg arg_beta,
                                  const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride,
@@ -127,7 +147,11 @@ void XgemmDirectStridedBatchedNN(const int kSizeM, const int kSizeN, const int k
 }
 
 // Direct version of the strided-batched GEMM kernel with [A, B] = [non-transposed, transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectStridedBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK,
                                  const real_arg arg_alpha, const real_arg arg_beta,
                                  const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride,
@@ -146,7 +170,11 @@ void XgemmDirectStridedBatchedNT(const int kSizeM, const int kSizeN, const int k
 }
 
 // Direct version of the strided-batched GEMM kernel with [A, B] = [transposed, non-transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectStridedBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK,
                                  const real_arg arg_alpha, const real_arg arg_beta,
                                  const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride,
@@ -165,7 +193,11 @@ void XgemmDirectStridedBatchedTN(const int kSizeM, const int kSizeN, const int k
 }
 
 // Direct version of the strided-batched GEMM kernel with [A, B] = [transposed, transposed]
-__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#if RELAX_WORKGROUP_SIZE == 1
+  __kernel
+#else
+  __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
+#endif
 void XgemmDirectStridedBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK,
                                  const real_arg arg_alpha, const real_arg arg_beta,
                                  const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride,
author	Gard Spreemann <gspr@nonempty.org>	2023-06-08 11:52:00 +0200
committer	Gard Spreemann <gspr@nonempty.org>	2023-06-08 11:52:00 +0200
commit	63870a2e60c1bc8bfa7e3672457b551a8e51ffaf (patch)
tree	fe2c0cd5f62e3fbd17e58d3903ec6bb37983f620 /src/kernels/level3/xgemm_direct_batched.opencl
parent	d31fb141cb597aaf405674621aa25f263aa375e1 (diff)
parent	b0b302889cc786907efb080c4e1beea30d2fa39f (diff)