From 8d5747aa54b88812ef4060328e3befdb13f3f45a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 5 Oct 2016 08:23:54 +0200 Subject: Made non-standard types void-pointers in the Netlib BLAS interface --- include/clblast_blas.h | 538 ++++++++++++++++---------------- scripts/generator/generator/datatype.py | 7 +- scripts/generator/generator/routine.py | 6 +- 3 files changed, 275 insertions(+), 276 deletions(-) diff --git a/include/clblast_blas.h b/include/clblast_blas.h index 41b03446..a5d0cc9c 100644 --- a/include/clblast_blas.h +++ b/include/clblast_blas.h @@ -98,14 +98,14 @@ void PUBLIC_API cblas_dswap(const int n, double* x, const int x_inc, double* y, const int y_inc); void PUBLIC_API cblas_cswap(const int n, - float2* x, const int x_inc, - float2* y, const int y_inc); + void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_zswap(const int n, - double2* x, const int x_inc, - double2* y, const int y_inc); + void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_hswap(const int n, - half* x, const int x_inc, - half* y, const int y_inc); + void* x, const int x_inc, + void* y, const int y_inc); // Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL void PUBLIC_API cblas_sscal(const int n, @@ -116,13 +116,13 @@ void PUBLIC_API cblas_dscal(const int n, double* x, const int x_inc); void PUBLIC_API cblas_cscal(const int n, const void* alpha, - float2* x, const int x_inc); + void* x, const int x_inc); void PUBLIC_API cblas_zscal(const int n, const void* alpha, - double2* x, const int x_inc); + void* x, const int x_inc); void PUBLIC_API cblas_hscal(const int n, const void* alpha, - half* x, const int x_inc); + void* x, const int x_inc); // Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY void PUBLIC_API cblas_scopy(const int n, @@ -132,14 +132,14 @@ void PUBLIC_API cblas_dcopy(const int n, const double* x, const int x_inc, double* y, const int y_inc); void PUBLIC_API cblas_ccopy(const int n, - const float2* x, const int x_inc, - float2* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_zcopy(const int n, - const double2* x, const int x_inc, - double2* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_hcopy(const int n, - const half* x, const int x_inc, - half* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY void PUBLIC_API cblas_saxpy(const int n, @@ -152,16 +152,16 @@ void PUBLIC_API cblas_daxpy(const int n, double* y, const int y_inc); void PUBLIC_API cblas_caxpy(const int n, const void* alpha, - const float2* x, const int x_inc, - float2* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_zaxpy(const int n, const void* alpha, - const double2* x, const int x_inc, - double2* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); void PUBLIC_API cblas_haxpy(const int n, const void* alpha, - const half* x, const int x_inc, - half* y, const int y_inc); + const void* x, const int x_inc, + void* y, const int y_inc); // Dot product of two vectors: SDOT/DDOT/HDOT void PUBLIC_API cblas_sdot(const int n, @@ -173,29 +173,29 @@ void PUBLIC_API cblas_ddot(const int n, const double* x, const int x_inc, const double* y, const int y_inc); void PUBLIC_API cblas_hdot(const int n, - half* dot, - const half* x, const int x_inc, - const half* y, const int y_inc); + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc); // Dot product of two complex vectors: CDOTU/ZDOTU void PUBLIC_API cblas_cdotu(const int n, - float2* dot, - const float2* x, const int x_inc, - const float2* y, const int y_inc); + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc); void PUBLIC_API cblas_zdotu(const int n, - double2* dot, - const double2* x, const int x_inc, - const double2* y, const int y_inc); + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc); // Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC void PUBLIC_API cblas_cdotc(const int n, - float2* dot, - const float2* x, const int x_inc, - const float2* y, const int y_inc); + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc); void PUBLIC_API cblas_zdotc(const int n, - double2* dot, - const double2* x, const int x_inc, - const double2* y, const int y_inc); + void* dot, + const void* x, const int x_inc, + const void* y, const int y_inc); // Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 void PUBLIC_API cblas_snrm2(const int n, @@ -205,14 +205,14 @@ void PUBLIC_API cblas_dnrm2(const int n, double* nrm2, const double* x, const int x_inc); void PUBLIC_API cblas_scnrm2(const int n, - float2* nrm2, - const float2* x, const int x_inc); + void* nrm2, + const void* x, const int x_inc); void PUBLIC_API cblas_dznrm2(const int n, - double2* nrm2, - const double2* x, const int x_inc); + void* nrm2, + const void* x, const int x_inc); void PUBLIC_API cblas_hnrm2(const int n, - half* nrm2, - const half* x, const int x_inc); + void* nrm2, + const void* x, const int x_inc); // Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM void PUBLIC_API cblas_sasum(const int n, @@ -222,14 +222,14 @@ void PUBLIC_API cblas_dasum(const int n, double* asum, const double* x, const int x_inc); void PUBLIC_API cblas_scasum(const int n, - float2* asum, - const float2* x, const int x_inc); + void* asum, + const void* x, const int x_inc); void PUBLIC_API cblas_dzasum(const int n, - double2* asum, - const double2* x, const int x_inc); + void* asum, + const void* x, const int x_inc); void PUBLIC_API cblas_hasum(const int n, - half* asum, - const half* x, const int x_inc); + void* asum, + const void* x, const int x_inc); // Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM void PUBLIC_API cblas_ssum(const int n, @@ -239,14 +239,14 @@ void PUBLIC_API cblas_dsum(const int n, double* sum, const double* x, const int x_inc); void PUBLIC_API cblas_scsum(const int n, - float2* sum, - const float2* x, const int x_inc); + void* sum, + const void* x, const int x_inc); void PUBLIC_API cblas_dzsum(const int n, - double2* sum, - const double2* x, const int x_inc); + void* sum, + const void* x, const int x_inc); void PUBLIC_API cblas_hsum(const int n, - half* sum, - const half* x, const int x_inc); + void* sum, + const void* x, const int x_inc); // Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX void PUBLIC_API cblas_isamax(const int n, @@ -256,14 +256,14 @@ void PUBLIC_API cblas_idamax(const int n, double* imax, const double* x, const int x_inc); void PUBLIC_API cblas_icamax(const int n, - float2* imax, - const float2* x, const int x_inc); + void* imax, + const void* x, const int x_inc); void PUBLIC_API cblas_izamax(const int n, - double2* imax, - const double2* x, const int x_inc); + void* imax, + const void* x, const int x_inc); void PUBLIC_API cblas_ihamax(const int n, - half* imax, - const half* x, const int x_inc); + void* imax, + const void* x, const int x_inc); // Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX void PUBLIC_API cblas_ismax(const int n, @@ -273,14 +273,14 @@ void PUBLIC_API cblas_idmax(const int n, double* imax, const double* x, const int x_inc); void PUBLIC_API cblas_icmax(const int n, - float2* imax, - const float2* x, const int x_inc); + void* imax, + const void* x, const int x_inc); void PUBLIC_API cblas_izmax(const int n, - double2* imax, - const double2* x, const int x_inc); + void* imax, + const void* x, const int x_inc); void PUBLIC_API cblas_ihmax(const int n, - half* imax, - const half* x, const int x_inc); + void* imax, + const void* x, const int x_inc); // Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN void PUBLIC_API cblas_ismin(const int n, @@ -290,14 +290,14 @@ void PUBLIC_API cblas_idmin(const int n, double* imin, const double* x, const int x_inc); void PUBLIC_API cblas_icmin(const int n, - float2* imin, - const float2* x, const int x_inc); + void* imin, + const void* x, const int x_inc); void PUBLIC_API cblas_izmin(const int n, - double2* imin, - const double2* x, const int x_inc); + void* imin, + const void* x, const int x_inc); void PUBLIC_API cblas_ihmin(const int n, - half* imin, - const half* x, const int x_inc); + void* imin, + const void* x, const int x_inc); // ================================================================================================= // BLAS level-2 (matrix-vector) routines @@ -321,24 +321,24 @@ void PUBLIC_API cblas_dgemv(const Layout layout, const Transpose a_transpose, void PUBLIC_API cblas_cgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - const float2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - float2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_zgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - const double2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - double2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_hgemv(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const half* a, const int a_ld, - const half* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - half* y, const int y_inc); + void* y, const int y_inc); // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV/HGBMV void PUBLIC_API cblas_sgbmv(const Layout layout, const Transpose a_transpose, @@ -358,72 +358,72 @@ void PUBLIC_API cblas_dgbmv(const Layout layout, const Transpose a_transpose, void PUBLIC_API cblas_cgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, - const float2* a, const int a_ld, - const float2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - float2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_zgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, - const double2* a, const int a_ld, - const double2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - double2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_hgbmv(const Layout layout, const Transpose a_transpose, const int m, const int n, const int kl, const int ku, const void* alpha, - const half* a, const int a_ld, - const half* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - half* y, const int y_inc); + void* y, const int y_inc); // Hermitian matrix-vector multiplication: CHEMV/ZHEMV void PUBLIC_API cblas_chemv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const float2* a, const int a_ld, - const float2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - float2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_zhemv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const double2* a, const int a_ld, - const double2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - double2* y, const int y_inc); + void* y, const int y_inc); // Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV void PUBLIC_API cblas_chbmv(const Layout layout, const Triangle triangle, const int n, const int k, const void* alpha, - const float2* a, const int a_ld, - const float2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - float2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_zhbmv(const Layout layout, const Triangle triangle, const int n, const int k, const void* alpha, - const double2* a, const int a_ld, - const double2* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - double2* y, const int y_inc); + void* y, const int y_inc); // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV void PUBLIC_API cblas_chpmv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const float2* ap, - const float2* x, const int x_inc, + const void* ap, + const void* x, const int x_inc, const void* beta, - float2* y, const int y_inc); + void* y, const int y_inc); void PUBLIC_API cblas_zhpmv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const double2* ap, - const double2* x, const int x_inc, + const void* ap, + const void* x, const int x_inc, const void* beta, - double2* y, const int y_inc); + void* y, const int y_inc); // Symmetric matrix-vector multiplication: SSYMV/DSYMV/HSYMV void PUBLIC_API cblas_ssymv(const Layout layout, const Triangle triangle, @@ -443,10 +443,10 @@ void PUBLIC_API cblas_dsymv(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hsymv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* a, const int a_ld, - const half* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - half* y, const int y_inc); + void* y, const int y_inc); // Symmetric banded matrix-vector multiplication: SSBMV/DSBMV/HSBMV void PUBLIC_API cblas_ssbmv(const Layout layout, const Triangle triangle, @@ -466,10 +466,10 @@ void PUBLIC_API cblas_dsbmv(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hsbmv(const Layout layout, const Triangle triangle, const int n, const int k, const void* alpha, - const half* a, const int a_ld, - const half* x, const int x_inc, + const void* a, const int a_ld, + const void* x, const int x_inc, const void* beta, - half* y, const int y_inc); + void* y, const int y_inc); // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV/HSPMV void PUBLIC_API cblas_sspmv(const Layout layout, const Triangle triangle, @@ -489,10 +489,10 @@ void PUBLIC_API cblas_dspmv(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hspmv(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* ap, - const half* x, const int x_inc, + const void* ap, + const void* x, const int x_inc, const void* beta, - half* y, const int y_inc); + void* y, const int y_inc); // Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV/HTRMV void PUBLIC_API cblas_strmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -505,16 +505,16 @@ void PUBLIC_API cblas_dtrmv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const float2* a, const int a_ld, - float2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_ztrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const double2* a, const int a_ld, - double2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_htrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const half* a, const int a_ld, - half* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); // Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV/HTBMV void PUBLIC_API cblas_stbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -527,16 +527,16 @@ void PUBLIC_API cblas_dtbmv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, - const float2* a, const int a_ld, - float2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_ztbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, - const double2* a, const int a_ld, - double2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_htbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, - const half* a, const int a_ld, - half* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV/HTPMV void PUBLIC_API cblas_stpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -549,16 +549,16 @@ void PUBLIC_API cblas_dtpmv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const float2* ap, - float2* x, const int x_inc); + const void* ap, + void* x, const int x_inc); void PUBLIC_API cblas_ztpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const double2* ap, - double2* x, const int x_inc); + const void* ap, + void* x, const int x_inc); void PUBLIC_API cblas_htpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const half* ap, - half* x, const int x_inc); + const void* ap, + void* x, const int x_inc); // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV void PUBLIC_API cblas_strsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -571,12 +571,12 @@ void PUBLIC_API cblas_dtrsv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const float2* a, const int a_ld, - float2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_ztrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const double2* a, const int a_ld, - double2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); // Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV void PUBLIC_API cblas_stbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -589,12 +589,12 @@ void PUBLIC_API cblas_dtbsv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, - const float2* a, const int a_ld, - float2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); void PUBLIC_API cblas_ztbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, const int k, - const double2* a, const int a_ld, - double2* x, const int x_inc); + const void* a, const int a_ld, + void* x, const int x_inc); // Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV void PUBLIC_API cblas_stpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -607,12 +607,12 @@ void PUBLIC_API cblas_dtpsv(const Layout layout, const Triangle triangle, const double* x, const int x_inc); void PUBLIC_API cblas_ctpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const float2* ap, - float2* x, const int x_inc); + const void* ap, + void* x, const int x_inc); void PUBLIC_API cblas_ztpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int n, - const double2* ap, - double2* x, const int x_inc); + const void* ap, + void* x, const int x_inc); // General rank-1 matrix update: SGER/DGER/HGER void PUBLIC_API cblas_sger(const Layout layout, @@ -630,89 +630,89 @@ void PUBLIC_API cblas_dger(const Layout layout, void PUBLIC_API cblas_hger(const Layout layout, const int m, const int n, const void* alpha, - const half* x, const int x_inc, - const half* y, const int y_inc, - half* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); // General rank-1 complex matrix update: CGERU/ZGERU void PUBLIC_API cblas_cgeru(const Layout layout, const int m, const int n, const void* alpha, - const float2* x, const int x_inc, - const float2* y, const int y_inc, - float2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); void PUBLIC_API cblas_zgeru(const Layout layout, const int m, const int n, const void* alpha, - const double2* x, const int x_inc, - const double2* y, const int y_inc, - double2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); // General rank-1 complex conjugated matrix update: CGERC/ZGERC void PUBLIC_API cblas_cgerc(const Layout layout, const int m, const int n, const void* alpha, - const float2* x, const int x_inc, - const float2* y, const int y_inc, - float2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); void PUBLIC_API cblas_zgerc(const Layout layout, const int m, const int n, const void* alpha, - const double2* x, const int x_inc, - const double2* y, const int y_inc, - double2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); // Hermitian rank-1 matrix update: CHER/ZHER void PUBLIC_API cblas_cher(const Layout layout, const Triangle triangle, const int n, - const float alpha, - const float2* x, const int x_inc, - float2* a, const int a_ld); + const void* alpha, + const void* x, const int x_inc, + void* a, const int a_ld); void PUBLIC_API cblas_zher(const Layout layout, const Triangle triangle, const int n, - const double alpha, - const double2* x, const int x_inc, - double2* a, const int a_ld); + const void* alpha, + const void* x, const int x_inc, + void* a, const int a_ld); // Hermitian packed rank-1 matrix update: CHPR/ZHPR void PUBLIC_API cblas_chpr(const Layout layout, const Triangle triangle, const int n, - const float alpha, - const float2* x, const int x_inc, - float2* ap); + const void* alpha, + const void* x, const int x_inc, + void* ap); void PUBLIC_API cblas_zhpr(const Layout layout, const Triangle triangle, const int n, - const double alpha, - const double2* x, const int x_inc, - double2* ap); + const void* alpha, + const void* x, const int x_inc, + void* ap); // Hermitian rank-2 matrix update: CHER2/ZHER2 void PUBLIC_API cblas_cher2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const float2* x, const int x_inc, - const float2* y, const int y_inc, - float2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); void PUBLIC_API cblas_zher2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const double2* x, const int x_inc, - const double2* y, const int y_inc, - double2* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 void PUBLIC_API cblas_chpr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const float2* x, const int x_inc, - const float2* y, const int y_inc, - float2* ap); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* ap); void PUBLIC_API cblas_zhpr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const double2* x, const int x_inc, - const double2* y, const int y_inc, - double2* ap); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* ap); // Symmetric rank-1 matrix update: SSYR/DSYR/HSYR void PUBLIC_API cblas_ssyr(const Layout layout, const Triangle triangle, @@ -728,8 +728,8 @@ void PUBLIC_API cblas_dsyr(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hsyr(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* x, const int x_inc, - half* a, const int a_ld); + const void* x, const int x_inc, + void* a, const int a_ld); // Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR void PUBLIC_API cblas_sspr(const Layout layout, const Triangle triangle, @@ -745,8 +745,8 @@ void PUBLIC_API cblas_dspr(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hspr(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* x, const int x_inc, - half* ap); + const void* x, const int x_inc, + void* ap); // Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2 void PUBLIC_API cblas_ssyr2(const Layout layout, const Triangle triangle, @@ -764,9 +764,9 @@ void PUBLIC_API cblas_dsyr2(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hsyr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* x, const int x_inc, - const half* y, const int y_inc, - half* a, const int a_ld); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* a, const int a_ld); // Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2 void PUBLIC_API cblas_sspr2(const Layout layout, const Triangle triangle, @@ -784,9 +784,9 @@ void PUBLIC_API cblas_dspr2(const Layout layout, const Triangle triangle, void PUBLIC_API cblas_hspr2(const Layout layout, const Triangle triangle, const int n, const void* alpha, - const half* x, const int x_inc, - const half* y, const int y_inc, - half* ap); + const void* x, const int x_inc, + const void* y, const int y_inc, + void* ap); // ================================================================================================= // BLAS level-3 (matrix-matrix) routines @@ -810,24 +810,24 @@ void PUBLIC_API cblas_dgemm(const Layout layout, const Transpose a_transpose, co void PUBLIC_API cblas_cgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const void* alpha, - const float2* a, const int a_ld, - const float2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - float2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_zgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const void* alpha, - const double2* a, const int a_ld, - const double2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - double2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_hgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, const int m, const int n, const int k, const void* alpha, - const half* a, const int a_ld, - const half* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - half* c, const int c_ld); + void* c, const int c_ld); // Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM/HSYMM void PUBLIC_API cblas_ssymm(const Layout layout, const Side side, const Triangle triangle, @@ -847,40 +847,40 @@ void PUBLIC_API cblas_dsymm(const Layout layout, const Side side, const Triangle void PUBLIC_API cblas_csymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - const float2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - float2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_zsymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - const double2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - double2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_hsymm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, - const half* a, const int a_ld, - const half* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - half* c, const int c_ld); + void* c, const int c_ld); // Hermitian matrix-matrix multiplication: CHEMM/ZHEMM void PUBLIC_API cblas_chemm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - const float2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - float2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_zhemm(const Layout layout, const Side side, const Triangle triangle, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - const double2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - double2* c, const int c_ld); + void* c, const int c_ld); // Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK/HSYRK void PUBLIC_API cblas_ssyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, @@ -898,35 +898,35 @@ void PUBLIC_API cblas_dsyrk(const Layout layout, const Triangle triangle, const void PUBLIC_API cblas_csyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, - const float2* a, const int a_ld, + const void* a, const int a_ld, const void* beta, - float2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_zsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, - const double2* a, const int a_ld, + const void* a, const int a_ld, const void* beta, - double2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_hsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, const void* alpha, - const half* a, const int a_ld, + const void* a, const int a_ld, const void* beta, - half* c, const int c_ld); + void* c, const int c_ld); // Rank-K update of a hermitian matrix: CHERK/ZHERK void PUBLIC_API cblas_cherk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, - const float alpha, - const float2* a, const int a_ld, - const float beta, - float2* c, const int c_ld); + const void* alpha, + const void* a, const int a_ld, + const void* beta, + void* c, const int c_ld); void PUBLIC_API cblas_zherk(const Layout layout, const Triangle triangle, const Transpose a_transpose, const int n, const int k, - const double alpha, - const double2* a, const int a_ld, - const double beta, - double2* c, const int c_ld); + const void* alpha, + const void* a, const int a_ld, + const void* beta, + void* c, const int c_ld); // Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K/HSYR2K void PUBLIC_API cblas_ssyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, @@ -946,40 +946,40 @@ void PUBLIC_API cblas_dsyr2k(const Layout layout, const Triangle triangle, const void PUBLIC_API cblas_csyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, - const float2* a, const int a_ld, - const float2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - float2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_zsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, - const double2* a, const int a_ld, - const double2* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - double2* c, const int c_ld); + void* c, const int c_ld); void PUBLIC_API cblas_hsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, - const half* a, const int a_ld, - const half* b, const int b_ld, + const void* a, const int a_ld, + const void* b, const int b_ld, const void* beta, - half* c, const int c_ld); + void* c, const int c_ld); // Rank-2K update of a hermitian matrix: CHER2K/ZHER2K void PUBLIC_API cblas_cher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, - const float2* a, const int a_ld, - const float2* b, const int b_ld, - const float beta, - float2* c, const int c_ld); + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld); void PUBLIC_API cblas_zher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose, const int n, const int k, const void* alpha, - const double2* a, const int a_ld, - const double2* b, const int b_ld, - const double beta, - double2* c, const int c_ld); + const void* a, const int a_ld, + const void* b, const int b_ld, + const void* beta, + void* c, const int c_ld); // Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM/HTRMM void PUBLIC_API cblas_strmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -995,18 +995,18 @@ void PUBLIC_API cblas_dtrmm(const Layout layout, const Side side, const Triangle void PUBLIC_API cblas_ctrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - float2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_ztrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - double2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_htrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const half* a, const int a_ld, - half* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); // Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM/HTRSM void PUBLIC_API cblas_strsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, @@ -1022,18 +1022,18 @@ void PUBLIC_API cblas_dtrsm(const Layout layout, const Side side, const Triangle void PUBLIC_API cblas_ctrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - float2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_ztrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - double2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_htrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, const int m, const int n, const void* alpha, - const half* a, const int a_ld, - half* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); // ================================================================================================= // Extra non-BLAS routines (level-X) @@ -1053,18 +1053,18 @@ void PUBLIC_API cblas_domatcopy(const Layout layout, const Transpose a_transpose void PUBLIC_API cblas_comatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const float2* a, const int a_ld, - float2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_zomatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const double2* a, const int a_ld, - double2* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); void PUBLIC_API cblas_homatcopy(const Layout layout, const Transpose a_transpose, const int m, const int n, const void* alpha, - const half* a, const int a_ld, - half* b, const int b_ld); + const void* a, const int a_ld, + void* b, const int b_ld); half* b, const size_t b_offset, const size_t b_ld); // ================================================================================================= diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py index 29acc744..01f32dd8 100644 --- a/scripts/generator/generator/datatype.py +++ b/scripts/generator/generator/datatype.py @@ -65,10 +65,9 @@ class DataType: return ((scalar == "alpha" and self.alpha_cpp in [D_FLOAT2, D_DOUBLE2]) or (scalar == "beta" and self.beta_cpp in [D_FLOAT2, D_DOUBLE2])) - def is_non_standard(self, scalar): - """Current scalar is of a non-standard type""" - return ((scalar == "alpha" and self.alpha_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2]) or - (scalar == "beta" and self.beta_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2])) + def is_non_standard(self): + """Current type is of a non-standard type""" + return self.buffer_type in [D_HALF, D_FLOAT2, D_DOUBLE2] # Regular data-types diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 4870b861..126d64ce 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -182,7 +182,7 @@ class Routine: """As above but as plain C pointer""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - data_type = "void" if flavour.is_non_standard(name) else flavour.buffer_type + data_type = "void" if flavour.is_non_standard() else flavour.buffer_type a = [prefix + data_type + "* " + name + ""] c = ["const int " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] return [", ".join(a + c)] @@ -317,9 +317,9 @@ class Routine: """Retrieves the definition of a scalar (alpha/beta) but make it a void pointer in case of non-standard types""" if name in self.scalars: if name == "alpha": - data_type = "void*" if flavour.is_non_standard(name) else flavour.alpha_cpp + data_type = "void*" if flavour.is_non_standard() else flavour.alpha_cpp return ["const " + data_type + " " + name] - data_type = "void*" if flavour.is_non_standard(name) else flavour.beta_cpp + data_type = "void*" if flavour.is_non_standard() else flavour.beta_cpp return ["const " + data_type + " " + name] return [] -- cgit v1.2.3