diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-14 17:21:34 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-14 17:21:34 +0200 |
commit | 55a802c63d79264bf6e5e9d82a1df34bbe85ee64 (patch) | |
tree | b5bd9ac4fca227055c47c76b801b15dbeaa303c7 /src/kernels | |
parent | b06bc01da90983ce484fded4e1a87f5fcd5c4eca (diff) |
Fixed a kernel/attribute order bug in the direct GEMM kernels
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level3/xgemm_direct_batched.opencl | 16 | ||||
-rw-r--r-- | src/kernels/level3/xgemm_direct_part3.opencl | 16 |
2 files changed, 16 insertions, 16 deletions
diff --git a/src/kernels/level3/xgemm_direct_batched.opencl b/src/kernels/level3/xgemm_direct_batched.opencl index fa582cff..d946a056 100644 --- a/src/kernels/level3/xgemm_direct_batched.opencl +++ b/src/kernels/level3/xgemm_direct_batched.opencl @@ -19,8 +19,8 @@ R"( // ================================================================================================= // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, non-transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld, @@ -40,8 +40,8 @@ __kernel void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int } // Direct version of the batched GEMM kernel with [A, B] = [non-transposed, transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld, @@ -61,8 +61,8 @@ __kernel void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int } // Direct version of the batched GEMM kernel with [A, B] = [transposed, non-transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld, @@ -82,8 +82,8 @@ __kernel void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int } // Direct version of the batched GEMM kernel with [A, B] = [transposed, transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK, const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas, const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld, const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld, diff --git a/src/kernels/level3/xgemm_direct_part3.opencl b/src/kernels/level3/xgemm_direct_part3.opencl index dcdeb1b6..5862dfa3 100644 --- a/src/kernels/level3/xgemm_direct_part3.opencl +++ b/src/kernels/level3/xgemm_direct_part3.opencl @@ -147,8 +147,8 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize // ================================================================================================= // Direct version of the GEMM kernel with [A, B] = [non-transposed, non-transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const __global realND* restrict bgm, const int b_offset, const int b_ld, @@ -162,8 +162,8 @@ __kernel void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK } // Direct version of the GEMM kernel with [A, B] = [non-transposed, transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const __global realND* restrict bgm, const int b_offset, const int b_ld, @@ -177,8 +177,8 @@ __kernel void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK } // Direct version of the GEMM kernel with [A, B] = [transposed, non-transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const __global realND* restrict bgm, const int b_offset, const int b_ld, @@ -192,8 +192,8 @@ __kernel void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK } // Direct version of the GEMM kernel with [A, B] = [transposed, transposed] -__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) -__kernel void XgemmDirectTT(const int kSizeM, const int kSizeN, const int kSizeK, +__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1))) +void XgemmDirectTT(const int kSizeM, const int kSizeN, const int kSizeK, const real_arg arg_alpha, const real_arg arg_beta, const __global realMD* restrict agm, const int a_offset, const int a_ld, const __global realND* restrict bgm, const int b_offset, const int b_ld, |