From 2a29dc061ce272ed59a4366e8326af221f0657bd Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 6 Apr 2018 21:06:13 +0200 Subject: Fixed a compilation issue for complex datatypes and vload --- src/kernels/level3/xgemm_part1.opencl | 50 +++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'src/kernels/level3/xgemm_part1.opencl') diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl index 265bb019..cbc43d51 100644 --- a/src/kernels/level3/xgemm_part1.opencl +++ b/src/kernels/level3/xgemm_part1.opencl @@ -299,16 +299,21 @@ INLINE_FUNC realN GlobalToPrivateB(const __global realN* restrict bgm, const int INLINE_FUNC realN GlobalToPrivateA2D(const __global real* restrict a_ptr, const int tid_y, const int _ni, const int kSizeK, const int idk, const int _ki) { const int a_index = (tid_y * NWI + _ni) * kSizeK + idk + _ki * VWN; - #if VWN == 1 - return a_ptr[a_index]; - #elif VWN == 2 - return vload2(0, a_ptr + a_index); - #elif VWN == 4 - return vload4(0, a_ptr + a_index); - #elif VWN == 8 - return vload8(0, a_ptr + a_index); - #elif VWN == 16 - return vload16(0, a_ptr + a_index); + #if PRECISION == 3232 || PRECISION == 6464 + const __global realN* restrict agm = (const __global realN* restrict) a_ptr; + return agm[a_index]; + #else + #if VWN == 1 + return a_ptr[a_index]; + #elif VWN == 2 + return vload2(0, a_ptr + a_index); + #elif VWN == 4 + return vload4(0, a_ptr + a_index); + #elif VWN == 8 + return vload8(0, a_ptr + a_index); + #elif VWN == 16 + return vload16(0, a_ptr + a_index); + #endif #endif } @@ -316,16 +321,21 @@ INLINE_FUNC realN GlobalToPrivateA2D(const __global real* restrict a_ptr, const INLINE_FUNC realM GlobalToPrivateB2D(const __global real* restrict b_ptr, const int tid_x, const int _mi, const int kSizeN, const int idk, const int _ki) { const int b_index = (idk + _ki) * kSizeN + tid_x * MWI + _mi * VWM; - #if VWM == 1 - return b_ptr[b_index]; - #elif VWM == 2 - return vload2(0, b_ptr + b_index); - #elif VWM == 4 - return vload4(0, b_ptr + b_index); - #elif VWM == 8 - return vload8(0, b_ptr + b_index); - #elif VWM == 16 - return vload16(0, b_ptr + b_index); + #if PRECISION == 3232 || PRECISION == 6464 + const __global realM* restrict bgm = (const __global realM* restrict) b_ptr; + return bgm[b_index]; + #else + #if VWM == 1 + return b_ptr[b_index]; + #elif VWM == 2 + return vload2(0, b_ptr + b_index); + #elif VWM == 4 + return vload4(0, b_ptr + b_index); + #elif VWM == 8 + return vload8(0, b_ptr + b_index); + #elif VWM == 16 + return vload16(0, b_ptr + b_index); + #endif #endif } -- cgit v1.2.3