diff options
author | Gard Spreemann <gspr@nonempty.org> | 2023-06-08 11:52:00 +0200 |
---|---|---|
committer | Gard Spreemann <gspr@nonempty.org> | 2023-06-08 11:52:00 +0200 |
commit | 63870a2e60c1bc8bfa7e3672457b551a8e51ffaf (patch) | |
tree | fe2c0cd5f62e3fbd17e58d3903ec6bb37983f620 /src/kernels/level3/xgemm_direct_batched.opencl | |
parent | d31fb141cb597aaf405674621aa25f263aa375e1 (diff) | |
parent | b0b302889cc786907efb080c4e1beea30d2fa39f (diff) |
Merge tag '1.6.0' into gspr/post-bookworm
Diffstat (limited to 'src/kernels/level3/xgemm_direct_batched.opencl')
-rw-r--r-- | src/kernels/level3/xgemm_direct_batched.opencl | 48 |
1 files changed, 40 insertions, 8 deletions
diff --git a/src/kernels/level3/xgemm_direct_batched.opencl b/src/kernels/level3/xgemm_direct_batched.opencl index d15ed31e..102ae762 100644 --- a/src/kernels/level3/xgemm_direct_batched.opencl +++ b/src/kernels/level3/xgemm_direct_batched.opencl @@ -20,7 +20,11 @@ R"( #if defined(ROUTINE_GEMMBATCHED) // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, non-transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, @@ -41,7 +45,11 @@ void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK, } // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, @@ -62,7 +70,11 @@ void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK, } // Direct version of the batched GEMM kernel with [A, B] = [transposed, non-transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, @@ -83,7 +95,11 @@ void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK, } // Direct version of the batched GEMM kernel with [A, B] = [transposed, transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, @@ -108,7 +124,11 @@ void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK, #if defined(ROUTINE_GEMMSTRIDEDBATCHED) // Direct version of the strided-batched GEMM kernel with [A, B] = [non-transposed, non-transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectStridedBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride, @@ -127,7 +147,11 @@ void XgemmDirectStridedBatchedNN(const int kSizeM, const int kSizeN, const int k } // Direct version of the strided-batched GEMM kernel with [A, B] = [non-transposed, transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectStridedBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride, @@ -146,7 +170,11 @@ void XgemmDirectStridedBatchedNT(const int kSizeM, const int kSizeN, const int k } // Direct version of the strided-batched GEMM kernel with [A, B] = [transposed, non-transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectStridedBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride, @@ -165,7 +193,11 @@ void XgemmDirectStridedBatchedTN(const int kSizeM, const int kSizeN, const int k } // Direct version of the strided-batched GEMM kernel with [A, B] = [transposed, transposed] -__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#if RELAX_WORKGROUP_SIZE == 1 + __kernel +#else + __kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +#endif void XgemmDirectStridedBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const int a_stride, |