From a9c25e9fd26d3b40a89a34e9d6f2da2f547adcd0 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Apr 2017 15:21:19 +0200 Subject: Factored out inclusion of clBLAS and CBLAS from the test-routine files --- test/routines/common.hpp | 32 ++++++++++++++++++++++++++++++++ test/routines/level1/xamax.hpp | 10 +--------- test/routines/level1/xasum.hpp | 10 +--------- test/routines/level1/xaxpy.hpp | 10 +--------- test/routines/level1/xcopy.hpp | 10 +--------- test/routines/level1/xdot.hpp | 10 +--------- test/routines/level1/xdotc.hpp | 10 +--------- test/routines/level1/xdotu.hpp | 10 +--------- test/routines/level1/xnrm2.hpp | 10 +--------- test/routines/level1/xscal.hpp | 10 +--------- test/routines/level1/xswap.hpp | 10 +--------- test/routines/level2/xgbmv.hpp | 10 +--------- test/routines/level2/xgemv.hpp | 10 +--------- test/routines/level2/xger.hpp | 10 +--------- test/routines/level2/xgerc.hpp | 10 +--------- test/routines/level2/xgeru.hpp | 10 +--------- test/routines/level2/xhbmv.hpp | 10 +--------- test/routines/level2/xhemv.hpp | 10 +--------- test/routines/level2/xher.hpp | 10 +--------- test/routines/level2/xher2.hpp | 10 +--------- test/routines/level2/xhpmv.hpp | 10 +--------- test/routines/level2/xhpr.hpp | 10 +--------- test/routines/level2/xhpr2.hpp | 10 +--------- test/routines/level2/xsbmv.hpp | 10 +--------- test/routines/level2/xspmv.hpp | 10 +--------- test/routines/level2/xspr.hpp | 10 +--------- test/routines/level2/xspr2.hpp | 10 +--------- test/routines/level2/xsymv.hpp | 10 +--------- test/routines/level2/xsyr.hpp | 10 +--------- test/routines/level2/xsyr2.hpp | 10 +--------- test/routines/level2/xtbmv.hpp | 10 +--------- test/routines/level2/xtpmv.hpp | 10 +--------- test/routines/level2/xtrmv.hpp | 10 +--------- test/routines/level2/xtrsv.hpp | 10 +--------- test/routines/level3/xgemm.hpp | 10 +--------- test/routines/level3/xhemm.hpp | 10 +--------- test/routines/level3/xher2k.hpp | 10 +--------- test/routines/level3/xherk.hpp | 10 +--------- test/routines/level3/xsymm.hpp | 10 +--------- test/routines/level3/xsyr2k.hpp | 10 +--------- test/routines/level3/xsyrk.hpp | 10 +--------- test/routines/level3/xtrmm.hpp | 10 +--------- test/routines/levelx/xgemmbatched.hpp | 10 +--------- 43 files changed, 74 insertions(+), 378 deletions(-) create mode 100644 test/routines/common.hpp (limited to 'test') diff --git a/test/routines/common.hpp b/test/routines/common.hpp new file mode 100644 index 00000000..0d516a0e --- /dev/null +++ b/test/routines/common.hpp @@ -0,0 +1,32 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains all the common includes for the clients and tests +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_COMMON_H_ +#define CLBLAST_TEST_ROUTINES_COMMON_H_ + +#include +#include + +#include "utilities/utilities.hpp" + +#ifdef CLBLAST_REF_CLBLAS + #include "test/wrapper_clblas.hpp" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "test/wrapper_cblas.hpp" +#endif + +// ================================================================================================= + +// CLBLAST_TEST_ROUTINES_COMMON_H_ +#endif diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp index 2e844f2c..fccefc73 100644 --- a/test/routines/level1/xamax.hpp +++ b/test/routines/level1/xamax.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAMAX_H_ #define CLBLAST_TEST_ROUTINES_XAMAX_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp index 8488bfc6..f0fca4d3 100644 --- a/test/routines/level1/xasum.hpp +++ b/test/routines/level1/xasum.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XASUM_H_ #define CLBLAST_TEST_ROUTINES_XASUM_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index cc7d251a..8426d739 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAXPY_H_ #define CLBLAST_TEST_ROUTINES_XAXPY_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp index 0dbf0f3d..d1e7f49e 100644 --- a/test/routines/level1/xcopy.hpp +++ b/test/routines/level1/xcopy.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XCOPY_H_ #define CLBLAST_TEST_ROUTINES_XCOPY_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp index bdf2e721..cb3d7979 100644 --- a/test/routines/level1/xdot.hpp +++ b/test/routines/level1/xdot.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOT_H_ #define CLBLAST_TEST_ROUTINES_XDOT_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp index 2cc71b93..10ecbda6 100644 --- a/test/routines/level1/xdotc.hpp +++ b/test/routines/level1/xdotc.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOTC_H_ #define CLBLAST_TEST_ROUTINES_XDOTC_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp index 272e1e31..6efd270e 100644 --- a/test/routines/level1/xdotu.hpp +++ b/test/routines/level1/xdotu.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XDOTU_H_ #define CLBLAST_TEST_ROUTINES_XDOTU_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp index cb1ec683..0ba24b13 100644 --- a/test/routines/level1/xnrm2.hpp +++ b/test/routines/level1/xnrm2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XNRM2_H_ #define CLBLAST_TEST_ROUTINES_XNRM2_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp index 3e6b9a38..e7db434e 100644 --- a/test/routines/level1/xscal.hpp +++ b/test/routines/level1/xscal.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSCAL_H_ #define CLBLAST_TEST_ROUTINES_XSCAL_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp index d9b84dc4..64feb744 100644 --- a/test/routines/level1/xswap.hpp +++ b/test/routines/level1/xswap.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSWAP_H_ #define CLBLAST_TEST_ROUTINES_XSWAP_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index 990ef49f..fb36d7f2 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGBMV_H_ #define CLBLAST_TEST_ROUTINES_XGBMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index a007cb62..4654838e 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMV_H_ #define CLBLAST_TEST_ROUTINES_XGEMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 5c131e2d..9d1dec13 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGER_H_ #define CLBLAST_TEST_ROUTINES_XGER_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index e3544424..efa72744 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGERC_H_ #define CLBLAST_TEST_ROUTINES_XGERC_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index 1d81e292..cb14636e 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGERU_H_ #define CLBLAST_TEST_ROUTINES_XGERU_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index 21194fd6..f41cc572 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHBMV_H_ #define CLBLAST_TEST_ROUTINES_XHBMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index ffef8ff8..9f5aca00 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHEMV_H_ #define CLBLAST_TEST_ROUTINES_XHEMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index 083bd3fc..ef0afd1c 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER_H_ #define CLBLAST_TEST_ROUTINES_XHER_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index 7bd890a5..d4b06c49 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER2_H_ #define CLBLAST_TEST_ROUTINES_XHER2_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index 285dd6d3..52f70dc9 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPMV_H_ #define CLBLAST_TEST_ROUTINES_XHPMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 88bae86b..39112e49 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPR_H_ #define CLBLAST_TEST_ROUTINES_XHPR_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index cd10fa00..21f0970a 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHPR2_H_ #define CLBLAST_TEST_ROUTINES_XHPR2_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index 5c70aba5..94e60dd2 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSBMV_H_ #define CLBLAST_TEST_ROUTINES_XSBMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index 560f5baa..02bfd4e3 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPMV_H_ #define CLBLAST_TEST_ROUTINES_XSPMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 2e12db33..9d992eb2 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPR_H_ #define CLBLAST_TEST_ROUTINES_XSPR_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index a7e22227..520bf412 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSPR2_H_ #define CLBLAST_TEST_ROUTINES_XSPR2_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index d9cf9c1e..130fee49 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYMV_H_ #define CLBLAST_TEST_ROUTINES_XSYMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index b60c3a36..2eb07f9b 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR_H_ #define CLBLAST_TEST_ROUTINES_XSYR_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index dd10a3d0..5c3598c5 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR2_H_ #define CLBLAST_TEST_ROUTINES_XSYR2_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index 7eb8ce9e..7ef67424 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTBMV_H_ #define CLBLAST_TEST_ROUTINES_XTBMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index 7f4842f0..6cea7061 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTPMV_H_ #define CLBLAST_TEST_ROUTINES_XTPMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index cb7527ed..7c97c966 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRMV_H_ #define CLBLAST_TEST_ROUTINES_XTRMV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 63d34758..18a3cef5 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_ #define CLBLAST_TEST_ROUTINES_XTRSV_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index a33cbfec..d6ad98f9 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMM_H_ #define CLBLAST_TEST_ROUTINES_XGEMM_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index 74029c7e..beadf62d 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHEMM_H_ #define CLBLAST_TEST_ROUTINES_XHEMM_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index ea13bbc1..b5d22579 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHER2K_H_ #define CLBLAST_TEST_ROUTINES_XHER2K_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index b1ce83e0..558f4e76 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XHERK_H_ #define CLBLAST_TEST_ROUTINES_XHERK_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 6ab644b8..704a8f9e 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYMM_H_ #define CLBLAST_TEST_ROUTINES_XSYMM_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 1400c4e2..c321b9cf 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYR2K_H_ #define CLBLAST_TEST_ROUTINES_XSYR2K_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 2df8d6b0..00a3013d 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XSYRK_H_ #define CLBLAST_TEST_ROUTINES_XSYRK_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 84adc6e0..660001df 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRMM_H_ #define CLBLAST_TEST_ROUTINES_XTRMM_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp index ab5f20c5..e13e9382 100644 --- a/test/routines/levelx/xgemmbatched.hpp +++ b/test/routines/levelx/xgemmbatched.hpp @@ -16,15 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_ #define CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_ -#include -#include - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= -- cgit v1.2.3 From c5461d77e58baf4776bed136bf8c682decf8134e Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Apr 2017 15:24:21 +0200 Subject: Factored out inclusion of clBLAS and CBLAS from the test-routine files --- test/routines/level3/xtrsm.hpp | 11 +---------- test/routines/levelx/xaxpybatched.hpp | 12 +----------- 2 files changed, 2 insertions(+), 21 deletions(-) (limited to 'test') diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index de5b307d..9e8b9565 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -16,18 +16,9 @@ #ifndef CLBLAST_TEST_ROUTINES_XTRSM_H_ #define CLBLAST_TEST_ROUTINES_XTRSM_H_ -#include -#include - +#include "test/routines/common.hpp" #include "test/routines/level3/xtrsm_data.hpp" -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif - namespace clblast { // ================================================================================================= diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp index 05141bbb..d8b3837c 100644 --- a/test/routines/levelx/xaxpybatched.hpp +++ b/test/routines/levelx/xaxpybatched.hpp @@ -16,17 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ #define CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_ -#include -#include - -#include "utilities/utilities.hpp" - -#ifdef CLBLAST_REF_CLBLAS - #include "test/wrapper_clblas.hpp" -#endif -#ifdef CLBLAST_REF_CBLAS - #include "test/wrapper_cblas.hpp" -#endif +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= -- cgit v1.2.3 From b24d36474334a74c286ffddc6af8fb74a3bae445 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 2 Apr 2017 18:06:15 +0200 Subject: Layed the groundwork for cuBLAS comparisons in the clients --- CMakeLists.txt | 27 ++++++++--- src/utilities/utilities.hpp | 2 + test/correctness/tester.cpp | 38 ++++++++++----- test/correctness/tester.hpp | 1 + test/performance/client.cpp | 35 +++++++++++--- test/performance/client.hpp | 12 ++++- test/wrapper_cuda.hpp | 111 ++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 200 insertions(+), 26 deletions(-) create mode 100644 test/wrapper_cuda.hpp (limited to 'test') diff --git a/CMakeLists.txt b/CMakeLists.txt index 62cf00cc..d9af0b91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,17 +130,23 @@ if(TUNERS) endif() # Locates the reference BLAS libraries in case the tests need to be compiled. The "FindclBLAS.cmake" -# and "FindCBLAS.cmake" are included. +# and "FindCBLAS.cmake" are included, "FindCUDA.cmake" is provided by CMake. if(CLIENTS OR TESTS) find_package(clBLAS) find_package(CBLAS) - if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND) + find_package(CUDA QUIET) # for cuBLAS + if(CUDA_FOUND) + message(STATUS "CUDA and cuBLAS found") + else() + message(STATUS "Could not find cuBLAS as a reference") + endif() + if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND AND NOT CUDA_FOUND) if(TESTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, disabling the compilation of the tests") set(TESTS OFF) endif() if(CLIENTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS, head-to-head performance comparison not supported in the clients") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, head-to-head performance comparison not supported in the clients") endif() endif() endif() @@ -320,13 +326,22 @@ if(CLIENTS OR TESTS) add_definitions(" -DCLBLAST_REF_CBLAS") endif() endif() + if(CUDA_FOUND) + set(REF_INCLUDES ${REF_INCLUDES} ${CUDA_INCLUDE_DIRS}) + set(REF_LIBRARIES ${REF_LIBRARIES} ${CUDA_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES}) + if(MSVC) + add_definitions(" /DCLBLAST_REF_CUBLAS") + else() + add_definitions(" -DCLBLAST_REF_CUBLAS") + endif() + endif() endif() # ================================================================================================== # Section for the performance tests (i.e. the client). These compare against optionally a reference -# library, either clBLAS or a CPU BLAS. +# library, either clBLAS, a CPU BLAS, or CUDA's cuBLAS. if(CLIENTS) # Visual Studio requires the sources of non-exported objects/libraries @@ -372,7 +387,7 @@ endif() # ================================================================================================== # Section for the correctness tests. Note that these tests require the presence of clBLAS and/or a -# CPU BLAS library to act as a reference. +# CPU BLAS library, and/or cuBLAS to act as a reference. if(TESTS) enable_testing() diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 535560a3..7aadb983 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -81,6 +81,7 @@ constexpr auto kArgFraction = "fraction"; // The client-specific arguments in string form constexpr auto kArgCompareclblas = "clblas"; constexpr auto kArgComparecblas = "cblas"; +constexpr auto kArgComparecublas = "cublas"; constexpr auto kArgStepSize = "step"; constexpr auto kArgNumSteps = "num_steps"; constexpr auto kArgNumRuns = "runs"; @@ -188,6 +189,7 @@ struct Arguments { // Client-specific arguments int compare_clblas = 1; int compare_cblas = 1; + int compare_cublas = 1; size_t step = 1; size_t num_steps = 0; size_t num_runs = 10; diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index 40784fdb..b352c1aa 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -116,24 +116,38 @@ Tester::Tester(const std::vector &arguments, const bool silent tests_failed_{0} { options_ = options; + // Determines which reference is the default + auto default_clblas = 0; + auto default_cblas = 0; + auto default_cublas = 0; + #if defined(CLBLAST_REF_CBLAS) + default_cblas = 1; + #elif defined(CLBLAST_REF_CLBLAS) + default_clblas = 1; + #elif defined(CLBLAST_REF_CUBLAS) + default_cublas = 1; + #endif + // Determines which reference to test against - #if defined(CLBLAST_REF_CLBLAS) && defined(CLBLAST_REF_CBLAS) - compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 0); - compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1); - #elif CLBLAST_REF_CLBLAS - compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 1); - compare_cblas_ = 0; - #elif CLBLAST_REF_CBLAS - compare_clblas_ = 0; - compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1); - #else - compare_clblas_ = 0; - compare_cblas_ = 0; + compare_clblas_ = 0; + compare_cblas_ = 0; + compare_cublas_ = 0; + #if defined(CLBLAST_REF_CBLAS) + compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, default_cblas); + #endif + #if defined(CLBLAST_REF_CLBLAS) + compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, default_clblas); + #endif + #if defined(CLBLAST_REF_CUBLAS) + compare_cublas_ = GetArgument(arguments, help_, kArgComparecublas, default_cublas); #endif // Prints the help message (command-line arguments) if (!silent) { fprintf(stdout, "\n* %s\n", help_.c_str()); } + // Support for cuBLAS not available yet + if (compare_cublas_) { throw std::runtime_error("Cannot test against cuBLAS; not implemented yet"); } + // Can only test against a single reference (not two, not zero) if (compare_clblas_ && compare_cblas_) { throw std::runtime_error("Cannot test against both clBLAS and CBLAS references; choose one using the -cblas and -clblas arguments"); diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp index f60be04b..8cfa702f 100644 --- a/test/correctness/tester.hpp +++ b/test/correctness/tester.hpp @@ -113,6 +113,7 @@ class Tester { // Testing against reference implementations int compare_cblas_; int compare_clblas_; + int compare_cublas_; private: diff --git a/test/performance/client.cpp b/test/performance/client.cpp index 48d6708e..a2f0f9f4 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -30,13 +30,14 @@ template const int Client::kSeed = 42; // fixed se template Client::Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2, - const std::vector &options, + const Reference3 run_reference3, const std::vector &options, const std::vector &buffers_in, const std::vector &buffers_out, const GetMetric get_flops, const GetMetric get_bytes): run_routine_(run_routine), run_reference1_(run_reference1), run_reference2_(run_reference2), + run_reference3_(run_reference3), options_(options), buffers_in_(buffers_in), buffers_out_(buffers_out), @@ -119,6 +120,11 @@ Arguments Client::ParseArguments(int argc, char *argv[], const size_t le #else args.compare_cblas = 0; #endif + #ifdef CLBLAST_REF_CUBLAS + args.compare_cublas = GetArgument(command_line_args, help, kArgComparecublas, 1); + #else + args.compare_cublas = 0; + #endif args.step = GetArgument(command_line_args, help, kArgStepSize, size_t{1}); args.num_steps = GetArgument(command_line_args, help, kArgNumSteps, size_t{0}); args.num_runs = GetArgument(command_line_args, help, kArgNumRuns, size_t{10}); @@ -133,24 +139,26 @@ Arguments Client::ParseArguments(int argc, char *argv[], const size_t le // Comparison against a non-BLAS routine is not supported if (level == 4) { // level-4 == level-X - if (args.compare_clblas != 0 || args.compare_cblas != 0) { + if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) { if (!args.silent) { - fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for this non-BLAS routine\n\n"); + fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for this non-BLAS routine\n\n"); } } args.compare_clblas = 0; args.compare_cblas = 0; + args.compare_cublas = 0; } - // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision + // Comparison against other BLAS libraries is not supported in case of half-precision if (args.precision == Precision::kHalf) { - if (args.compare_clblas != 0 || args.compare_cblas != 0) { + if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) { if (!args.silent) { - fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n"); + fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for half-precision\n\n"); } } args.compare_clblas = 0; args.compare_cblas = 0; + args.compare_cublas = 0; } // Returns the arguments @@ -174,6 +182,9 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) #ifdef CLBLAST_REF_CLBLAS if (args.compare_clblas) { clblasSetup(); } #endif + #ifdef CLBLAST_REF_CUBLAS + cudaSetDevice(static_cast(args.device_id)); + #endif // Iterates over all "num_step" values jumping by "step" each time auto s = size_t{0}; @@ -232,6 +243,16 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) HostToDevice(args, buffers, buffers_host, queue, buffers_out_); timings.push_back(std::pair("CPU BLAS", ms_cblas)); } + if (args.compare_cublas) { + auto buffers_host = BuffersHost(); + auto buffers_cuda = BuffersCUDA(); + DeviceToHost(args, buffers, buffers_host, queue, buffers_in_); + HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_); + auto ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS"); + CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_); + HostToDevice(args, buffers, buffers_host, queue, buffers_out_); + timings.push_back(std::pair("cuBLAS", ms_cublas)); + } // Prints the performance of the tested libraries PrintTableRow(args, timings); @@ -307,6 +328,7 @@ void Client::PrintTableHeader(const Arguments& args) { fprintf(stdout, " | <-- CLBlast -->"); if (args.compare_clblas) { fprintf(stdout, " | <-- clBLAS -->"); } if (args.compare_cblas) { fprintf(stdout, " | <-- CPU BLAS -->"); } + if (args.compare_cublas) { fprintf(stdout, " | <-- cuBLAS -->"); } fprintf(stdout, " |\n"); } @@ -315,6 +337,7 @@ void Client::PrintTableHeader(const Arguments& args) { fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1"); if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); } if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); } + if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); } fprintf(stdout, "\n"); } diff --git a/test/performance/client.hpp b/test/performance/client.hpp index 12fd113d..47a13017 100644 --- a/test/performance/client.hpp +++ b/test/performance/client.hpp @@ -31,6 +31,7 @@ #ifdef CLBLAST_REF_CLBLAS #include #endif +#include "test/wrapper_cuda.hpp" #include "clblast.h" namespace clblast { @@ -46,12 +47,13 @@ class Client { using Routine = std::function&, Buffers&, Queue&)>; using Reference1 = std::function&, Buffers&, Queue&)>; using Reference2 = std::function&, BuffersHost&, Queue&)>; + using Reference3 = std::function&, BuffersCUDA&, Queue&)>; using SetMetric = std::function&)>; using GetMetric = std::function&)>; // The constructor Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2, - const std::vector &options, + const Reference3 run_reference3, const std::vector &options, const std::vector &buffers_in, const std::vector &buffers_out, const GetMetric get_flops, const GetMetric get_bytes); @@ -84,6 +86,7 @@ class Client { const Routine run_routine_; const Reference1 run_reference1_; const Reference2 run_reference2_; + const Reference3 run_reference3_; const std::vector options_; const std::vector buffers_in_; const std::vector buffers_out_; @@ -118,9 +121,14 @@ void RunClient(int argc, char *argv[]) { #else auto reference2 = ReferenceNotAvailable>; #endif + #ifdef CLBLAST_REF_CUBLAS + auto reference3 = C::RunReference3; // cuBLAS when available + #else + auto reference3 = ReferenceNotAvailable>; + #endif // Creates a new client - auto client = Client(C::RunRoutine, reference1, reference2, C::GetOptions(), + auto client = Client(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(), C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes); // Simple command line argument parser with defaults diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp new file mode 100644 index 00000000..0f24d0d9 --- /dev/null +++ b/test/wrapper_cuda.hpp @@ -0,0 +1,111 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file contains all the CUDA related code; used only in case of testing against cuBLAS +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_WRAPPER_CUDA_H_ +#define CLBLAST_TEST_WRAPPER_CUDA_H_ + +#include +#include +#include +#include + +#include "utilities/utilities.hpp" + +#ifdef CLBLAST_REF_CUBLAS + #include + #include +#endif + +namespace clblast { +// ================================================================================================= + +// Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards +#ifdef CLBLAST_REF_CUBLAS + template + void CUDAToHost(const T* buffer_cuda, const std::vector &buffer_host, const size_t size) { + cudaMemcpy( + std::reinterpret_cast(buffer_host.data()), + std::reinterpret_cast(buffer_cuda), + size*sizeof(T), + cudaMemcpyDeviceToHost + ); + cudaFree(buffer_cuda); +} +#else + template void CUDAToHost(const T*, const std::vector&, const size_t) { } +#endif + +// Allocates space on the CUDA device and copies in data from the host +#ifdef CLBLAST_REF_CUBLAS + template + void HostToCUDA(const T* buffer_cuda, const std::vector &buffer_host, const size_t size) { + cudaMalloc(std::reinterpret_cast&buffer_cuda, size*sizeof(T)); + cudaMemcpy( + std::reinterpret_cast(buffer_cuda), + std::reinterpret_cast(buffer_host.data()), + size*sizeof(T), + cudaMemcpyHostToDevice + ); + } +#else + template void HostToCUDA(const T*, const std::vector&, const size_t) { } +#endif + +// ================================================================================================= + +template +struct BuffersCUDA { + T* x_vec; + T* y_vec; + T* a_mat; + T* b_mat; + T* c_mat; + T* ap_mat; + T* scalar; +}; + +template +void CUDAToHost(const Arguments &args, BuffersCUDA &buffers, BuffersHost &buffers_host, + const std::vector &names) { + for (auto &name: names) { + if (name == kBufVecX) { buffers_host.x_vec = std::vector(args.x_size, static_cast(0)); CUDAToHost(buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { buffers_host.y_vec = std::vector(args.y_size, static_cast(0)); CUDAToHost(buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { buffers_host.a_mat = std::vector(args.a_size, static_cast(0)); CUDAToHost(buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { buffers_host.b_mat = std::vector(args.b_size, static_cast(0)); CUDAToHost(buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { buffers_host.c_mat = std::vector(args.c_size, static_cast(0)); CUDAToHost(buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector(args.ap_size, static_cast(0)); CUDAToHost(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { buffers_host.scalar = std::vector(args.scalar_size, static_cast(0)); CUDAToHost(buffers.scalar, buffers_host.scalar, args.scalar_size); } + else { throw std::runtime_error("Invalid buffer name"); } + } +} + +template +void HostToCUDA(const Arguments &args, BuffersCUDA &buffers, BuffersHost &buffers_host, + const std::vector &names) { + for (auto &name: names) { + if (name == kBufVecX) { HostToCUDA(buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { HostToCUDA(buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { HostToCUDA(buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { HostToCUDA(buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { HostToCUDA(buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { HostToCUDA(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { HostToCUDA(buffers.scalar, buffers_host.scalar, args.scalar_size); } + else { throw std::runtime_error("Invalid buffer name"); } + } +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_WRAPPER_CUDA_H_ +#endif -- cgit v1.2.3 From eb1fda2729c4022493aa874e3fe81d2a270085a1 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 3 Apr 2017 21:44:35 +0200 Subject: In-lined the float2 and double2 types to avoid collision with CUDA's definitions --- scripts/generator/generator/cpp.py | 6 ------ scripts/generator/generator/datatype.py | 6 ++++-- test/correctness/routines/level1/xamax.cpp | 8 ++------ test/correctness/routines/level1/xasum.cpp | 8 ++------ test/correctness/routines/level1/xaxpy.cpp | 8 ++------ test/correctness/routines/level1/xcopy.cpp | 8 ++------ test/correctness/routines/level1/xdot.cpp | 4 ---- test/correctness/routines/level1/xdotc.cpp | 8 ++------ test/correctness/routines/level1/xdotu.cpp | 8 ++------ test/correctness/routines/level1/xnrm2.cpp | 8 ++------ test/correctness/routines/level1/xrot.cpp | 4 ---- test/correctness/routines/level1/xrotg.cpp | 4 ---- test/correctness/routines/level1/xrotm.cpp | 4 ---- test/correctness/routines/level1/xrotmg.cpp | 4 ---- test/correctness/routines/level1/xscal.cpp | 8 ++------ test/correctness/routines/level1/xswap.cpp | 8 ++------ test/correctness/routines/level2/xgbmv.cpp | 8 ++------ test/correctness/routines/level2/xgemv.cpp | 8 ++------ test/correctness/routines/level2/xger.cpp | 4 ---- test/correctness/routines/level2/xgerc.cpp | 8 ++------ test/correctness/routines/level2/xgeru.cpp | 8 ++------ test/correctness/routines/level2/xhbmv.cpp | 8 ++------ test/correctness/routines/level2/xhemv.cpp | 8 ++------ test/correctness/routines/level2/xher.cpp | 8 ++------ test/correctness/routines/level2/xher2.cpp | 8 ++------ test/correctness/routines/level2/xhpmv.cpp | 8 ++------ test/correctness/routines/level2/xhpr.cpp | 8 ++------ test/correctness/routines/level2/xhpr2.cpp | 8 ++------ test/correctness/routines/level2/xsbmv.cpp | 4 ---- test/correctness/routines/level2/xspmv.cpp | 4 ---- test/correctness/routines/level2/xspr.cpp | 4 ---- test/correctness/routines/level2/xspr2.cpp | 4 ---- test/correctness/routines/level2/xsymv.cpp | 4 ---- test/correctness/routines/level2/xsyr.cpp | 4 ---- test/correctness/routines/level2/xsyr2.cpp | 4 ---- test/correctness/routines/level2/xtbmv.cpp | 8 ++------ test/correctness/routines/level2/xtbsv.cpp | 8 ++------ test/correctness/routines/level2/xtpmv.cpp | 8 ++------ test/correctness/routines/level2/xtpsv.cpp | 8 ++------ test/correctness/routines/level2/xtrmv.cpp | 8 ++------ test/correctness/routines/level2/xtrsv.cpp | 8 ++------ test/correctness/routines/level3/xgemm.cpp | 8 ++------ test/correctness/routines/level3/xhemm.cpp | 8 ++------ test/correctness/routines/level3/xher2k.cpp | 8 ++------ test/correctness/routines/level3/xherk.cpp | 8 ++------ test/correctness/routines/level3/xsymm.cpp | 8 ++------ test/correctness/routines/level3/xsyr2k.cpp | 8 ++------ test/correctness/routines/level3/xsyrk.cpp | 8 ++------ test/correctness/routines/level3/xtrmm.cpp | 8 ++------ test/correctness/routines/level3/xtrsm.cpp | 8 ++------ test/correctness/routines/levelx/xaxpybatched.cpp | 8 ++------ test/correctness/routines/levelx/xgemmbatched.cpp | 8 ++------ test/correctness/routines/levelx/xomatcopy.cpp | 8 ++------ test/performance/routines/level1/xamax.cpp | 8 ++------ test/performance/routines/level1/xasum.cpp | 8 ++------ test/performance/routines/level1/xaxpy.cpp | 8 ++------ test/performance/routines/level1/xcopy.cpp | 8 ++------ test/performance/routines/level1/xdot.cpp | 4 ---- test/performance/routines/level1/xdotc.cpp | 8 ++------ test/performance/routines/level1/xdotu.cpp | 8 ++------ test/performance/routines/level1/xnrm2.cpp | 8 ++------ test/performance/routines/level1/xrot.cpp | 4 ---- test/performance/routines/level1/xrotg.cpp | 4 ---- test/performance/routines/level1/xrotm.cpp | 4 ---- test/performance/routines/level1/xrotmg.cpp | 4 ---- test/performance/routines/level1/xscal.cpp | 8 ++------ test/performance/routines/level1/xswap.cpp | 8 ++------ test/performance/routines/level2/xgbmv.cpp | 8 ++------ test/performance/routines/level2/xgemv.cpp | 8 ++------ test/performance/routines/level2/xger.cpp | 4 ---- test/performance/routines/level2/xgerc.cpp | 8 ++------ test/performance/routines/level2/xgeru.cpp | 8 ++------ test/performance/routines/level2/xhbmv.cpp | 8 ++------ test/performance/routines/level2/xhemv.cpp | 8 ++------ test/performance/routines/level2/xher.cpp | 8 ++------ test/performance/routines/level2/xher2.cpp | 8 ++------ test/performance/routines/level2/xhpmv.cpp | 8 ++------ test/performance/routines/level2/xhpr.cpp | 8 ++------ test/performance/routines/level2/xhpr2.cpp | 8 ++------ test/performance/routines/level2/xsbmv.cpp | 4 ---- test/performance/routines/level2/xspmv.cpp | 4 ---- test/performance/routines/level2/xspr.cpp | 4 ---- test/performance/routines/level2/xspr2.cpp | 4 ---- test/performance/routines/level2/xsymv.cpp | 4 ---- test/performance/routines/level2/xsyr.cpp | 4 ---- test/performance/routines/level2/xsyr2.cpp | 4 ---- test/performance/routines/level2/xtbmv.cpp | 8 ++------ test/performance/routines/level2/xtbsv.cpp | 8 ++------ test/performance/routines/level2/xtpmv.cpp | 8 ++------ test/performance/routines/level2/xtpsv.cpp | 8 ++------ test/performance/routines/level2/xtrmv.cpp | 8 ++------ test/performance/routines/level2/xtrsv.cpp | 8 ++------ test/performance/routines/level3/xgemm.cpp | 8 ++------ test/performance/routines/level3/xhemm.cpp | 8 ++------ test/performance/routines/level3/xher2k.cpp | 8 ++------ test/performance/routines/level3/xherk.cpp | 8 ++------ test/performance/routines/level3/xsymm.cpp | 8 ++------ test/performance/routines/level3/xsyr2k.cpp | 8 ++------ test/performance/routines/level3/xsyrk.cpp | 8 ++------ test/performance/routines/level3/xtrmm.cpp | 8 ++------ test/performance/routines/level3/xtrsm.cpp | 8 ++------ test/performance/routines/levelx/xaxpybatched.cpp | 8 ++------ test/performance/routines/levelx/xgemmbatched.cpp | 8 ++------ test/performance/routines/levelx/xomatcopy.cpp | 8 ++------ 104 files changed, 156 insertions(+), 568 deletions(-) (limited to 'test') diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 91fdf458..03da7985 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -295,9 +295,6 @@ def performance_test(routine, level_string): result = "" result += "#include \"test/performance/client.hpp\"" + NL result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL - result += "// Shortcuts to the clblast namespace" + NL - result += "using float2 = clblast::float2;" + NL - result += "using double2 = clblast::double2;" + NL + NL result += "// Main function (not within the clblast namespace)" + NL result += "int main(int argc, char *argv[]) {" + NL result += " const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);" + NL @@ -324,9 +321,6 @@ def correctness_test(routine, level_string): result = "" result += "#include \"test/correctness/testblas.hpp\"" + NL result += "#include \"test/routines/level" + level_string + "/x" + routine.lowercase_name() + ".hpp\"" + NL + NL - result += "// Shortcuts to the clblast namespace" + NL - result += "using float2 = clblast::float2;" + NL - result += "using double2 = clblast::double2;" + NL + NL result += "// Main function (not within the clblast namespace)" + NL result += "int main(int argc, char *argv[]) {" + NL result += " auto errors = size_t{0};" + NL diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py index cfdbf748..cab2411a 100644 --- a/scripts/generator/generator/datatype.py +++ b/scripts/generator/generator/datatype.py @@ -72,9 +72,11 @@ class DataType: def test_template(self): """Returns the template as used in the correctness/performance tests""" + buffer_type = "clblast::" + self.buffer_type if self.buffer_type in [D_FLOAT2, D_DOUBLE2] else self.buffer_type + beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_FLOAT2, D_DOUBLE2] else self.beta_cpp if self.buffer_type != self.beta_cpp: - return "<" + self.buffer_type + "," + self.beta_cpp + ">, " + self.buffer_type + ", " + self.beta_cpp - return "<" + self.buffer_type + ">, " + self.buffer_type + ", " + self.beta_cpp + return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp + return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp def is_complex(self, scalar): """Current scalar is complex""" diff --git a/test/correctness/routines/level1/xamax.cpp b/test/correctness/routines/level1/xamax.cpp index 607637e8..d940ae7a 100644 --- a/test/correctness/routines/level1/xamax.cpp +++ b/test/correctness/routines/level1/xamax.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xamax.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "iSAMAX"); errors += clblast::RunTests, double, double>(argc, argv, true, "iDAMAX"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "iCAMAX"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "iZAMAX"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "iCAMAX"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "iZAMAX"); errors += clblast::RunTests, half, half>(argc, argv, true, "iHAMAX"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xasum.cpp b/test/correctness/routines/level1/xasum.cpp index e22e42a6..b969d662 100644 --- a/test/correctness/routines/level1/xasum.cpp +++ b/test/correctness/routines/level1/xasum.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xasum.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SASUM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DASUM"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "ScASUM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "DzASUM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "ScASUM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "DzASUM"); errors += clblast::RunTests, half, half>(argc, argv, true, "HASUM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xaxpy.cpp b/test/correctness/routines/level1/xaxpy.cpp index 064172fa..6f4f34fb 100644 --- a/test/correctness/routines/level1/xaxpy.cpp +++ b/test/correctness/routines/level1/xaxpy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xaxpy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SAXPY"); errors += clblast::RunTests, double, double>(argc, argv, true, "DAXPY"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CAXPY"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZAXPY"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CAXPY"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPY"); errors += clblast::RunTests, half, half>(argc, argv, true, "HAXPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xcopy.cpp b/test/correctness/routines/level1/xcopy.cpp index e6f2581b..e6e94d34 100644 --- a/test/correctness/routines/level1/xcopy.cpp +++ b/test/correctness/routines/level1/xcopy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SCOPY"); errors += clblast::RunTests, double, double>(argc, argv, true, "DCOPY"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CCOPY"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZCOPY"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CCOPY"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZCOPY"); errors += clblast::RunTests, half, half>(argc, argv, true, "HCOPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xdot.cpp b/test/correctness/routines/level1/xdot.cpp index 080250cb..8dccbf26 100644 --- a/test/correctness/routines/level1/xdot.cpp +++ b/test/correctness/routines/level1/xdot.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xdotc.cpp b/test/correctness/routines/level1/xdotc.cpp index 2a7bbeca..59eedddc 100644 --- a/test/correctness/routines/level1/xdotc.cpp +++ b/test/correctness/routines/level1/xdotc.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdotc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CDOTC"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZDOTC"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CDOTC"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTC"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xdotu.cpp b/test/correctness/routines/level1/xdotu.cpp index 1047d021..4392326d 100644 --- a/test/correctness/routines/level1/xdotu.cpp +++ b/test/correctness/routines/level1/xdotu.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xdotu.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CDOTU"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZDOTU"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CDOTU"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTU"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xnrm2.cpp b/test/correctness/routines/level1/xnrm2.cpp index 142fa7ba..46ca1526 100644 --- a/test/correctness/routines/level1/xnrm2.cpp +++ b/test/correctness/routines/level1/xnrm2.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xnrm2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SNRM2"); errors += clblast::RunTests, double, double>(argc, argv, true, "DNRM2"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "ScNRM2"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "DzNRM2"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "ScNRM2"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "DzNRM2"); errors += clblast::RunTests, half, half>(argc, argv, true, "HNRM2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xrot.cpp b/test/correctness/routines/level1/xrot.cpp index 5af358eb..d5eb6516 100644 --- a/test/correctness/routines/level1/xrot.cpp +++ b/test/correctness/routines/level1/xrot.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotg.cpp b/test/correctness/routines/level1/xrotg.cpp index ad23a554..ec544eab 100644 --- a/test/correctness/routines/level1/xrotg.cpp +++ b/test/correctness/routines/level1/xrotg.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotm.cpp b/test/correctness/routines/level1/xrotm.cpp index 4f7e8f15..7f2d7ce6 100644 --- a/test/correctness/routines/level1/xrotm.cpp +++ b/test/correctness/routines/level1/xrotm.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xrotmg.cpp b/test/correctness/routines/level1/xrotmg.cpp index ca89bc12..4ef6e67d 100644 --- a/test/correctness/routines/level1/xrotmg.cpp +++ b/test/correctness/routines/level1/xrotmg.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xrotmg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level1/xscal.cpp b/test/correctness/routines/level1/xscal.cpp index 939524be..c9788142 100644 --- a/test/correctness/routines/level1/xscal.cpp +++ b/test/correctness/routines/level1/xscal.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xscal.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SSCAL"); errors += clblast::RunTests, double, double>(argc, argv, true, "DSCAL"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CSCAL"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZSCAL"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSCAL"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSCAL"); errors += clblast::RunTests, half, half>(argc, argv, true, "HSCAL"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level1/xswap.cpp b/test/correctness/routines/level1/xswap.cpp index 446f3d65..ee694a08 100644 --- a/test/correctness/routines/level1/xswap.cpp +++ b/test/correctness/routines/level1/xswap.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level1/xswap.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SSWAP"); errors += clblast::RunTests, double, double>(argc, argv, true, "DSWAP"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CSWAP"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZSWAP"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSWAP"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSWAP"); errors += clblast::RunTests, half, half>(argc, argv, true, "HSWAP"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgbmv.cpp b/test/correctness/routines/level2/xgbmv.cpp index 8c49bc65..6aac283b 100644 --- a/test/correctness/routines/level2/xgbmv.cpp +++ b/test/correctness/routines/level2/xgbmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SGBMV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGBMV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CGBMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGBMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGBMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGBMV"); errors += clblast::RunTests, half, half>(argc, argv, true, "HGBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgemv.cpp b/test/correctness/routines/level2/xgemv.cpp index 902ae777..66994b89 100644 --- a/test/correctness/routines/level2/xgemv.cpp +++ b/test/correctness/routines/level2/xgemv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CGEMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGEMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMV"); errors += clblast::RunTests, half, half>(argc, argv, true, "HGEMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xger.cpp b/test/correctness/routines/level2/xger.cpp index ce61bbcb..3b5d16e9 100644 --- a/test/correctness/routines/level2/xger.cpp +++ b/test/correctness/routines/level2/xger.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xger.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xgerc.cpp b/test/correctness/routines/level2/xgerc.cpp index b747f20d..42f6bb45 100644 --- a/test/correctness/routines/level2/xgerc.cpp +++ b/test/correctness/routines/level2/xgerc.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgerc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CGERC"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGERC"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CGERC"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGERC"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xgeru.cpp b/test/correctness/routines/level2/xgeru.cpp index f80c1e2b..f167eff5 100644 --- a/test/correctness/routines/level2/xgeru.cpp +++ b/test/correctness/routines/level2/xgeru.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xgeru.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CGERU"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGERU"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CGERU"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGERU"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhbmv.cpp b/test/correctness/routines/level2/xhbmv.cpp index a4885c01..168d9474 100644 --- a/test/correctness/routines/level2/xhbmv.cpp +++ b/test/correctness/routines/level2/xhbmv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHBMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHBMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHBMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhemv.cpp b/test/correctness/routines/level2/xhemv.cpp index 4318ffee..eabdf67d 100644 --- a/test/correctness/routines/level2/xhemv.cpp +++ b/test/correctness/routines/level2/xhemv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHEMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHEMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHEMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xher.cpp b/test/correctness/routines/level2/xher.cpp index fe37bd76..a47a45ac 100644 --- a/test/correctness/routines/level2/xher.cpp +++ b/test/correctness/routines/level2/xher.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xher.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float>(argc, argv, false, "CHER"); - errors += clblast::RunTests, double2, double>(argc, argv, true, "ZHER"); + errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHER"); + errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHER"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xher2.cpp b/test/correctness/routines/level2/xher2.cpp index 0b4af4d0..544ab16d 100644 --- a/test/correctness/routines/level2/xher2.cpp +++ b/test/correctness/routines/level2/xher2.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xher2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHER2"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHER2"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHER2"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHER2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpmv.cpp b/test/correctness/routines/level2/xhpmv.cpp index dd77df71..30d23b8f 100644 --- a/test/correctness/routines/level2/xhpmv.cpp +++ b/test/correctness/routines/level2/xhpmv.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHPMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHPMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHPMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHPMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpr.cpp b/test/correctness/routines/level2/xhpr.cpp index 5a3f615f..ed876857 100644 --- a/test/correctness/routines/level2/xhpr.cpp +++ b/test/correctness/routines/level2/xhpr.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float>(argc, argv, false, "CHPR"); - errors += clblast::RunTests, double2, double>(argc, argv, true, "ZHPR"); + errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHPR"); + errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHPR"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xhpr2.cpp b/test/correctness/routines/level2/xhpr2.cpp index 8218b444..b3bd167a 100644 --- a/test/correctness/routines/level2/xhpr2.cpp +++ b/test/correctness/routines/level2/xhpr2.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xhpr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHPR2"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHPR2"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHPR2"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHPR2"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xsbmv.cpp b/test/correctness/routines/level2/xsbmv.cpp index 7918cb21..3b6b3972 100644 --- a/test/correctness/routines/level2/xsbmv.cpp +++ b/test/correctness/routines/level2/xsbmv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspmv.cpp b/test/correctness/routines/level2/xspmv.cpp index 78210660..9dccdbc1 100644 --- a/test/correctness/routines/level2/xspmv.cpp +++ b/test/correctness/routines/level2/xspmv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspr.cpp b/test/correctness/routines/level2/xspr.cpp index d05adf34..9cf242c1 100644 --- a/test/correctness/routines/level2/xspr.cpp +++ b/test/correctness/routines/level2/xspr.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xspr2.cpp b/test/correctness/routines/level2/xspr2.cpp index caa46a09..2650bd03 100644 --- a/test/correctness/routines/level2/xspr2.cpp +++ b/test/correctness/routines/level2/xspr2.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xspr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsymv.cpp b/test/correctness/routines/level2/xsymv.cpp index 978a5f8a..3f0a8f8b 100644 --- a/test/correctness/routines/level2/xsymv.cpp +++ b/test/correctness/routines/level2/xsymv.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsymv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsyr.cpp b/test/correctness/routines/level2/xsyr.cpp index 244dbfb4..15ac1f14 100644 --- a/test/correctness/routines/level2/xsyr.cpp +++ b/test/correctness/routines/level2/xsyr.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsyr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xsyr2.cpp b/test/correctness/routines/level2/xsyr2.cpp index 422e67ad..74806219 100644 --- a/test/correctness/routines/level2/xsyr2.cpp +++ b/test/correctness/routines/level2/xsyr2.cpp @@ -12,10 +12,6 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xsyr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; diff --git a/test/correctness/routines/level2/xtbmv.cpp b/test/correctness/routines/level2/xtbmv.cpp index 491708ec..667ae732 100644 --- a/test/correctness/routines/level2/xtbmv.cpp +++ b/test/correctness/routines/level2/xtbmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STBMV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTBMV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTBMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTBMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTBMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTBMV"); errors += clblast::RunTests, half, half>(argc, argv, true, "HTBMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtbsv.cpp b/test/correctness/routines/level2/xtbsv.cpp index 12b5dca5..5cfc6942 100644 --- a/test/correctness/routines/level2/xtbsv.cpp +++ b/test/correctness/routines/level2/xtbsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtbsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STBSV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTBSV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTBSV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTBSV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTBSV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTBSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtpmv.cpp b/test/correctness/routines/level2/xtpmv.cpp index b89f0adc..89056678 100644 --- a/test/correctness/routines/level2/xtpmv.cpp +++ b/test/correctness/routines/level2/xtpmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STPMV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTPMV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTPMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTPMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTPMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTPMV"); errors += clblast::RunTests, half, half>(argc, argv, true, "HTPMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtpsv.cpp b/test/correctness/routines/level2/xtpsv.cpp index 6e6e7c85..28c9fe39 100644 --- a/test/correctness/routines/level2/xtpsv.cpp +++ b/test/correctness/routines/level2/xtpsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtpsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STPSV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTPSV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTPSV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTPSV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTPSV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTPSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtrmv.cpp b/test/correctness/routines/level2/xtrmv.cpp index 819f5cad..b1a414af 100644 --- a/test/correctness/routines/level2/xtrmv.cpp +++ b/test/correctness/routines/level2/xtrmv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtrmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STRMV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTRMV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTRMV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTRMV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRMV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMV"); errors += clblast::RunTests, half, half>(argc, argv, true, "HTRMV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level2/xtrsv.cpp b/test/correctness/routines/level2/xtrsv.cpp index 78e33807..b35d7fc7 100644 --- a/test/correctness/routines/level2/xtrsv.cpp +++ b/test/correctness/routines/level2/xtrsv.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level2/xtrsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STRSV"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTRSV"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTRSV"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTRSV"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRSV"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSV"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp index 54d41719..7fda5f2d 100644 --- a/test/correctness/routines/level3/xgemm.cpp +++ b/test/correctness/routines/level3/xgemm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xgemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMM"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CGEMM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGEMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM"); errors += clblast::RunTests, half, half>(argc, argv, true, "HGEMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xhemm.cpp b/test/correctness/routines/level3/xhemm.cpp index 76c970a7..cbd277e2 100644 --- a/test/correctness/routines/level3/xhemm.cpp +++ b/test/correctness/routines/level3/xhemm.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xhemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float2>(argc, argv, false, "CHEMM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZHEMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, false, "CHEMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xher2k.cpp b/test/correctness/routines/level3/xher2k.cpp index c653265e..e21a429c 100644 --- a/test/correctness/routines/level3/xher2k.cpp +++ b/test/correctness/routines/level3/xher2k.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xher2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float>(argc, argv, false, "CHER2K"); - errors += clblast::RunTests, double2, double>(argc, argv, true, "ZHER2K"); + errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHER2K"); + errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHER2K"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xherk.cpp b/test/correctness/routines/level3/xherk.cpp index 09ea9e4d..5665147e 100644 --- a/test/correctness/routines/level3/xherk.cpp +++ b/test/correctness/routines/level3/xherk.cpp @@ -12,15 +12,11 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xherk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; - errors += clblast::RunTests, float2, float>(argc, argv, false, "CHERK"); - errors += clblast::RunTests, double2, double>(argc, argv, true, "ZHERK"); + errors += clblast::RunTests, clblast::float2, float>(argc, argv, false, "CHERK"); + errors += clblast::RunTests, clblast::double2, double>(argc, argv, true, "ZHERK"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsymm.cpp b/test/correctness/routines/level3/xsymm.cpp index 3cb3515a..3e745d24 100644 --- a/test/correctness/routines/level3/xsymm.cpp +++ b/test/correctness/routines/level3/xsymm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsymm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SSYMM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DSYMM"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CSYMM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZSYMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYMM"); errors += clblast::RunTests, half, half>(argc, argv, true, "HSYMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsyr2k.cpp b/test/correctness/routines/level3/xsyr2k.cpp index 617af04d..b3027063 100644 --- a/test/correctness/routines/level3/xsyr2k.cpp +++ b/test/correctness/routines/level3/xsyr2k.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsyr2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SSYR2K"); errors += clblast::RunTests, double, double>(argc, argv, true, "DSYR2K"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CSYR2K"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZSYR2K"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYR2K"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYR2K"); errors += clblast::RunTests, half, half>(argc, argv, true, "HSYR2K"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xsyrk.cpp b/test/correctness/routines/level3/xsyrk.cpp index 2014b8d0..26c0db41 100644 --- a/test/correctness/routines/level3/xsyrk.cpp +++ b/test/correctness/routines/level3/xsyrk.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xsyrk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SSYRK"); errors += clblast::RunTests, double, double>(argc, argv, true, "DSYRK"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CSYRK"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZSYRK"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CSYRK"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZSYRK"); errors += clblast::RunTests, half, half>(argc, argv, true, "HSYRK"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xtrmm.cpp b/test/correctness/routines/level3/xtrmm.cpp index 32640d52..63d17ed5 100644 --- a/test/correctness/routines/level3/xtrmm.cpp +++ b/test/correctness/routines/level3/xtrmm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xtrmm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STRMM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTRMM"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTRMM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTRMM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRMM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMM"); errors += clblast::RunTests, half, half>(argc, argv, true, "HTRMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/level3/xtrsm.cpp b/test/correctness/routines/level3/xtrsm.cpp index bc45a8bf..dcc20060 100644 --- a/test/correctness/routines/level3/xtrsm.cpp +++ b/test/correctness/routines/level3/xtrsm.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/level3/xtrsm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "STRSM"); errors += clblast::RunTests, double, double>(argc, argv, true, "DTRSM"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CTRSM"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZTRSM"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CTRSM"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xaxpybatched.cpp b/test/correctness/routines/levelx/xaxpybatched.cpp index a106440f..3b906217 100644 --- a/test/correctness/routines/levelx/xaxpybatched.cpp +++ b/test/correctness/routines/levelx/xaxpybatched.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xaxpybatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SAXPYBATCHED"); errors += clblast::RunTests, double, double>(argc, argv, true, "DAXPYBATCHED"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CAXPYBATCHED"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZAXPYBATCHED"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CAXPYBATCHED"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPYBATCHED"); errors += clblast::RunTests, half, half>(argc, argv, true, "HAXPYBATCHED"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xgemmbatched.cpp b/test/correctness/routines/levelx/xgemmbatched.cpp index 748e1bb7..1e931fd5 100644 --- a/test/correctness/routines/levelx/xgemmbatched.cpp +++ b/test/correctness/routines/levelx/xgemmbatched.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xgemmbatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SGEMMBATCHED"); errors += clblast::RunTests, double, double>(argc, argv, true, "DGEMMBATCHED"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "CGEMMBATCHED"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZGEMMBATCHED"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMBATCHED"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMBATCHED"); errors += clblast::RunTests, half, half>(argc, argv, true, "HGEMMBATCHED"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/correctness/routines/levelx/xomatcopy.cpp b/test/correctness/routines/levelx/xomatcopy.cpp index e034bc18..f512432b 100644 --- a/test/correctness/routines/levelx/xomatcopy.cpp +++ b/test/correctness/routines/levelx/xomatcopy.cpp @@ -12,17 +12,13 @@ #include "test/correctness/testblas.hpp" #include "test/routines/levelx/xomatcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunTests, float, float>(argc, argv, false, "SOMATCOPY"); errors += clblast::RunTests, double, double>(argc, argv, true, "DOMATCOPY"); - errors += clblast::RunTests, float2, float2>(argc, argv, true, "COMATCOPY"); - errors += clblast::RunTests, double2, double2>(argc, argv, true, "ZOMATCOPY"); + errors += clblast::RunTests, clblast::float2, clblast::float2>(argc, argv, true, "COMATCOPY"); + errors += clblast::RunTests, clblast::double2, clblast::double2>(argc, argv, true, "ZOMATCOPY"); errors += clblast::RunTests, half, half>(argc, argv, true, "HOMATCOPY"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/performance/routines/level1/xamax.cpp b/test/performance/routines/level1/xamax.cpp index 5dc7b3d9..5cbef604 100644 --- a/test/performance/routines/level1/xamax.cpp +++ b/test/performance/routines/level1/xamax.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xamax.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xasum.cpp b/test/performance/routines/level1/xasum.cpp index bf5b2fa9..7fccb678 100644 --- a/test/performance/routines/level1/xasum.cpp +++ b/test/performance/routines/level1/xasum.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xasum.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xaxpy.cpp b/test/performance/routines/level1/xaxpy.cpp index faccc089..739408bb 100644 --- a/test/performance/routines/level1/xaxpy.cpp +++ b/test/performance/routines/level1/xaxpy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xaxpy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xcopy.cpp b/test/performance/routines/level1/xcopy.cpp index 8aa536af..902c394f 100644 --- a/test/performance/routines/level1/xcopy.cpp +++ b/test/performance/routines/level1/xcopy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xdot.cpp b/test/performance/routines/level1/xdot.cpp index 9a570e1e..b2d4d969 100644 --- a/test/performance/routines/level1/xdot.cpp +++ b/test/performance/routines/level1/xdot.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xdotc.cpp b/test/performance/routines/level1/xdotc.cpp index 426b81ae..308bcdab 100644 --- a/test/performance/routines/level1/xdotc.cpp +++ b/test/performance/routines/level1/xdotc.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdotc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xdotu.cpp b/test/performance/routines/level1/xdotu.cpp index 4fbe167d..fc54a8a5 100644 --- a/test/performance/routines/level1/xdotu.cpp +++ b/test/performance/routines/level1/xdotu.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xdotu.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xnrm2.cpp b/test/performance/routines/level1/xnrm2.cpp index 6a1cdcc7..769335eb 100644 --- a/test/performance/routines/level1/xnrm2.cpp +++ b/test/performance/routines/level1/xnrm2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xnrm2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xrot.cpp b/test/performance/routines/level1/xrot.cpp index 2b94ca39..f010e04a 100644 --- a/test/performance/routines/level1/xrot.cpp +++ b/test/performance/routines/level1/xrot.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrot.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotg.cpp b/test/performance/routines/level1/xrotg.cpp index ee6fc44b..4c8d33cf 100644 --- a/test/performance/routines/level1/xrotg.cpp +++ b/test/performance/routines/level1/xrotg.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotm.cpp b/test/performance/routines/level1/xrotm.cpp index e8d73311..bc2111b3 100644 --- a/test/performance/routines/level1/xrotm.cpp +++ b/test/performance/routines/level1/xrotm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xrotmg.cpp b/test/performance/routines/level1/xrotmg.cpp index a5266b14..fb568243 100644 --- a/test/performance/routines/level1/xrotmg.cpp +++ b/test/performance/routines/level1/xrotmg.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xrotmg.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level1/xscal.cpp b/test/performance/routines/level1/xscal.cpp index 6fefc5d0..b9db60cf 100644 --- a/test/performance/routines/level1/xscal.cpp +++ b/test/performance/routines/level1/xscal.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xscal.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level1/xswap.cpp b/test/performance/routines/level1/xswap.cpp index b728b8f4..db40f6e4 100644 --- a/test/performance/routines/level1/xswap.cpp +++ b/test/performance/routines/level1/xswap.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level1/xswap.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgbmv.cpp b/test/performance/routines/level2/xgbmv.cpp index 6a4b01f8..23a91503 100644 --- a/test/performance/routines/level2/xgbmv.cpp +++ b/test/performance/routines/level2/xgbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgemv.cpp b/test/performance/routines/level2/xgemv.cpp index 335d5ef1..3bb14b68 100644 --- a/test/performance/routines/level2/xgemv.cpp +++ b/test/performance/routines/level2/xgemv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xger.cpp b/test/performance/routines/level2/xger.cpp index 50fdb9e6..ca23b8f0 100644 --- a/test/performance/routines/level2/xger.cpp +++ b/test/performance/routines/level2/xger.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xger.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xgerc.cpp b/test/performance/routines/level2/xgerc.cpp index 67c72285..0423cdd5 100644 --- a/test/performance/routines/level2/xgerc.cpp +++ b/test/performance/routines/level2/xgerc.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgerc.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xgeru.cpp b/test/performance/routines/level2/xgeru.cpp index 6e845bb8..c0fbb2d5 100644 --- a/test/performance/routines/level2/xgeru.cpp +++ b/test/performance/routines/level2/xgeru.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xgeru.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhbmv.cpp b/test/performance/routines/level2/xhbmv.cpp index 600317c1..d59cba26 100644 --- a/test/performance/routines/level2/xhbmv.cpp +++ b/test/performance/routines/level2/xhbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhemv.cpp b/test/performance/routines/level2/xhemv.cpp index 7700cf7b..1664b6cd 100644 --- a/test/performance/routines/level2/xhemv.cpp +++ b/test/performance/routines/level2/xhemv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhemv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xher.cpp b/test/performance/routines/level2/xher.cpp index e7276aee..434f486c 100644 --- a/test/performance/routines/level2/xher.cpp +++ b/test/performance/routines/level2/xher.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xher.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float>(argc, argv); break; + clblast::RunClient, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double>(argc, argv); break; + clblast::RunClient, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xher2.cpp b/test/performance/routines/level2/xher2.cpp index b4c53206..cce40a9e 100644 --- a/test/performance/routines/level2/xher2.cpp +++ b/test/performance/routines/level2/xher2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xher2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpmv.cpp b/test/performance/routines/level2/xhpmv.cpp index d9683d2e..d88791fe 100644 --- a/test/performance/routines/level2/xhpmv.cpp +++ b/test/performance/routines/level2/xhpmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpr.cpp b/test/performance/routines/level2/xhpr.cpp index c4ffaf81..a92a3134 100644 --- a/test/performance/routines/level2/xhpr.cpp +++ b/test/performance/routines/level2/xhpr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float>(argc, argv); break; + clblast::RunClient, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double>(argc, argv); break; + clblast::RunClient, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xhpr2.cpp b/test/performance/routines/level2/xhpr2.cpp index 3e5d4004..f34de29b 100644 --- a/test/performance/routines/level2/xhpr2.cpp +++ b/test/performance/routines/level2/xhpr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xhpr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xsbmv.cpp b/test/performance/routines/level2/xsbmv.cpp index 9c0ab3b6..59bbf40c 100644 --- a/test/performance/routines/level2/xsbmv.cpp +++ b/test/performance/routines/level2/xsbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspmv.cpp b/test/performance/routines/level2/xspmv.cpp index 6cc4e3ba..9ba29f43 100644 --- a/test/performance/routines/level2/xspmv.cpp +++ b/test/performance/routines/level2/xspmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspr.cpp b/test/performance/routines/level2/xspr.cpp index dc45ba6d..57551f5d 100644 --- a/test/performance/routines/level2/xspr.cpp +++ b/test/performance/routines/level2/xspr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xspr2.cpp b/test/performance/routines/level2/xspr2.cpp index 3c9a769f..573fb652 100644 --- a/test/performance/routines/level2/xspr2.cpp +++ b/test/performance/routines/level2/xspr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xspr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsymv.cpp b/test/performance/routines/level2/xsymv.cpp index aaa98c8b..25933d8d 100644 --- a/test/performance/routines/level2/xsymv.cpp +++ b/test/performance/routines/level2/xsymv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsymv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsyr.cpp b/test/performance/routines/level2/xsyr.cpp index d710bf63..3b54510d 100644 --- a/test/performance/routines/level2/xsyr.cpp +++ b/test/performance/routines/level2/xsyr.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsyr.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xsyr2.cpp b/test/performance/routines/level2/xsyr2.cpp index 39b46b6a..ab9641c2 100644 --- a/test/performance/routines/level2/xsyr2.cpp +++ b/test/performance/routines/level2/xsyr2.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xsyr2.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); diff --git a/test/performance/routines/level2/xtbmv.cpp b/test/performance/routines/level2/xtbmv.cpp index 5fb3ea14..319f9c80 100644 --- a/test/performance/routines/level2/xtbmv.cpp +++ b/test/performance/routines/level2/xtbmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtbmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtbsv.cpp b/test/performance/routines/level2/xtbsv.cpp index 7b88917c..4d37e76d 100644 --- a/test/performance/routines/level2/xtbsv.cpp +++ b/test/performance/routines/level2/xtbsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtbsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtpmv.cpp b/test/performance/routines/level2/xtpmv.cpp index 907749a7..c2db51b1 100644 --- a/test/performance/routines/level2/xtpmv.cpp +++ b/test/performance/routines/level2/xtpmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtpmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtpsv.cpp b/test/performance/routines/level2/xtpsv.cpp index 0dab8ff6..b01a9f05 100644 --- a/test/performance/routines/level2/xtpsv.cpp +++ b/test/performance/routines/level2/xtpsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtpsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtrmv.cpp b/test/performance/routines/level2/xtrmv.cpp index c2c6f232..610a5052 100644 --- a/test/performance/routines/level2/xtrmv.cpp +++ b/test/performance/routines/level2/xtrmv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtrmv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level2/xtrsv.cpp b/test/performance/routines/level2/xtrsv.cpp index 49e477f7..02255e71 100644 --- a/test/performance/routines/level2/xtrsv.cpp +++ b/test/performance/routines/level2/xtrsv.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level2/xtrsv.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xgemm.cpp b/test/performance/routines/level3/xgemm.cpp index deb2493f..602e1a20 100644 --- a/test/performance/routines/level3/xgemm.cpp +++ b/test/performance/routines/level3/xgemm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xgemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xhemm.cpp b/test/performance/routines/level3/xhemm.cpp index 975c672f..6c3687a9 100644 --- a/test/performance/routines/level3/xhemm.cpp +++ b/test/performance/routines/level3/xhemm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xhemm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xher2k.cpp b/test/performance/routines/level3/xher2k.cpp index d579d4f9..9d3385f7 100644 --- a/test/performance/routines/level3/xher2k.cpp +++ b/test/performance/routines/level3/xher2k.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xher2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float>(argc, argv); break; + clblast::RunClient, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double>(argc, argv); break; + clblast::RunClient, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xherk.cpp b/test/performance/routines/level3/xherk.cpp index 94411e5a..ae6e774e 100644 --- a/test/performance/routines/level3/xherk.cpp +++ b/test/performance/routines/level3/xherk.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xherk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -24,9 +20,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode"); case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float>(argc, argv); break; + clblast::RunClient, clblast::float2, float>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double>(argc, argv); break; + clblast::RunClient, clblast::double2, double>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsymm.cpp b/test/performance/routines/level3/xsymm.cpp index 04ae8eb0..ba3b6ab2 100644 --- a/test/performance/routines/level3/xsymm.cpp +++ b/test/performance/routines/level3/xsymm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsymm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsyr2k.cpp b/test/performance/routines/level3/xsyr2k.cpp index 7b8b6f4f..150a4192 100644 --- a/test/performance/routines/level3/xsyr2k.cpp +++ b/test/performance/routines/level3/xsyr2k.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsyr2k.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xsyrk.cpp b/test/performance/routines/level3/xsyrk.cpp index ea0fc33b..00cef52b 100644 --- a/test/performance/routines/level3/xsyrk.cpp +++ b/test/performance/routines/level3/xsyrk.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xsyrk.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xtrmm.cpp b/test/performance/routines/level3/xtrmm.cpp index 7a29e111..fb54a410 100644 --- a/test/performance/routines/level3/xtrmm.cpp +++ b/test/performance/routines/level3/xtrmm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xtrmm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/level3/xtrsm.cpp b/test/performance/routines/level3/xtrsm.cpp index 342274b7..f44265f2 100644 --- a/test/performance/routines/level3/xtrsm.cpp +++ b/test/performance/routines/level3/xtrsm.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/level3/xtrsm.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -26,9 +22,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xaxpybatched.cpp b/test/performance/routines/levelx/xaxpybatched.cpp index 6d3bcb51..7c09cd5b 100644 --- a/test/performance/routines/levelx/xaxpybatched.cpp +++ b/test/performance/routines/levelx/xaxpybatched.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xaxpybatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xgemmbatched.cpp b/test/performance/routines/levelx/xgemmbatched.cpp index c9477fad..f4c860d8 100644 --- a/test/performance/routines/levelx/xgemmbatched.cpp +++ b/test/performance/routines/levelx/xgemmbatched.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xgemmbatched.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } diff --git a/test/performance/routines/levelx/xomatcopy.cpp b/test/performance/routines/levelx/xomatcopy.cpp index 5821c3b8..568f22e6 100644 --- a/test/performance/routines/levelx/xomatcopy.cpp +++ b/test/performance/routines/levelx/xomatcopy.cpp @@ -12,10 +12,6 @@ #include "test/performance/client.hpp" #include "test/routines/levelx/xomatcopy.hpp" -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv); @@ -27,9 +23,9 @@ int main(int argc, char *argv[]) { case clblast::Precision::kDouble: clblast::RunClient, double, double>(argc, argv); break; case clblast::Precision::kComplexSingle: - clblast::RunClient, float2, float2>(argc, argv); break; + clblast::RunClient, clblast::float2, clblast::float2>(argc, argv); break; case clblast::Precision::kComplexDouble: - clblast::RunClient, double2, double2>(argc, argv); break; + clblast::RunClient, clblast::double2, clblast::double2>(argc, argv); break; } return 0; } -- cgit v1.2.3 From af9a521042ffc2823f60e12018db9e0a29120628 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 3 Apr 2017 21:46:07 +0200 Subject: Fixes the CUDA wrapper (now actually tested on a system with CUDA) --- test/routines/common.hpp | 3 +++ test/wrapper_cuda.hpp | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'test') diff --git a/test/routines/common.hpp b/test/routines/common.hpp index 0d516a0e..1abf5528 100644 --- a/test/routines/common.hpp +++ b/test/routines/common.hpp @@ -25,6 +25,9 @@ #ifdef CLBLAST_REF_CBLAS #include "test/wrapper_cblas.hpp" #endif +#ifdef CLBLAST_REF_CUBLAS + #include "test/wrapper_cuda.hpp" +#endif // ================================================================================================= diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp index 0f24d0d9..509de9d1 100644 --- a/test/wrapper_cuda.hpp +++ b/test/wrapper_cuda.hpp @@ -22,7 +22,7 @@ #include "utilities/utilities.hpp" #ifdef CLBLAST_REF_CUBLAS - #include + #include #include #endif @@ -32,33 +32,33 @@ namespace clblast { // Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards #ifdef CLBLAST_REF_CUBLAS template - void CUDAToHost(const T* buffer_cuda, const std::vector &buffer_host, const size_t size) { + void CUDAToHost(T* buffer_cuda, std::vector &buffer_host, const size_t size) { cudaMemcpy( - std::reinterpret_cast(buffer_host.data()), - std::reinterpret_cast(buffer_cuda), + reinterpret_cast(buffer_host.data()), + reinterpret_cast(buffer_cuda), size*sizeof(T), cudaMemcpyDeviceToHost ); cudaFree(buffer_cuda); } #else - template void CUDAToHost(const T*, const std::vector&, const size_t) { } + template void CUDAToHost(T*, const std::vector&, const size_t) { } #endif // Allocates space on the CUDA device and copies in data from the host #ifdef CLBLAST_REF_CUBLAS template - void HostToCUDA(const T* buffer_cuda, const std::vector &buffer_host, const size_t size) { - cudaMalloc(std::reinterpret_cast&buffer_cuda, size*sizeof(T)); + void HostToCUDA(T* buffer_cuda, std::vector &buffer_host, const size_t size) { + cudaMalloc(reinterpret_cast(&buffer_cuda), size*sizeof(T)); cudaMemcpy( - std::reinterpret_cast(buffer_cuda), - std::reinterpret_cast(buffer_host.data()), + reinterpret_cast(buffer_cuda), + reinterpret_cast(buffer_host.data()), size*sizeof(T), cudaMemcpyHostToDevice ); } #else - template void HostToCUDA(const T*, const std::vector&, const size_t) { } + template void HostToCUDA(T*, const std::vector&, const size_t) { } #endif // ================================================================================================= -- cgit v1.2.3 From 674ff96fdf79b171ba4d100fefff437d7943ddc9 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 5 Apr 2017 21:27:25 +0200 Subject: Added a first version of a cuBLAS wrapper (WIP) --- scripts/generator/generator.py | 11 +- scripts/generator/generator/convert.py | 13 + scripts/generator/generator/cpp.py | 46 + scripts/generator/generator/routine.py | 85 +- test/wrapper_cublas.hpp | 2418 ++++++++++++++++++++++++++++++++ 5 files changed, 2567 insertions(+), 6 deletions(-) create mode 100644 test/wrapper_cublas.hpp (limited to 'test') diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 086b27d3..3f3fab62 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -38,11 +38,12 @@ FILES = [ "/src/clblast_c.cpp", "/test/wrapper_clblas.hpp", "/test/wrapper_cblas.hpp", + "/test/wrapper_cublas.hpp", "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", ] -HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32] -FOOTER_LINES = [25, 138, 27, 38, 6, 6, 9, 2] +HEADER_LINES = [123, 76, 126, 23, 29, 41, 29, 65, 32] +FOOTER_LINES = [25, 138, 27, 38, 6, 6, 6, 9, 2] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 @@ -194,7 +195,7 @@ def main(argv): # Re-writes the body of the file with open(library_root + FILES[i], "w") as f: body = "" - levels = [1, 2, 3] if (i == 4 or i == 5) else [1, 2, 3, 4] + levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4] for level in levels: body += cpp.LEVEL_SEPARATORS[level - 1] + "\n" for routine in ROUTINES[level - 1]: @@ -211,9 +212,11 @@ def main(argv): if i == 5: body += cpp.wrapper_cblas(routine) if i == 6: + body += cpp.wrapper_cublas(routine) + if i == 7: if not routine.batched: body += cpp.clblast_netlib_c_h(routine) - if i == 7: + if i == 8: if not routine.batched: body += cpp.clblast_netlib_c_cc(routine) f.write("".join(file_header)) diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index c0309ec3..80b6f338 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -56,6 +56,19 @@ def option_to_cblas(x): }[x] +def option_to_cublas(x): + """As above, but for clBLAS data-types""" + return { + 'layout': "cublas_has_no_layout", + 'a_transpose': "cublasOperation_t", + 'b_transpose': "cublasOperation_t", + 'ab_transpose': "cublasOperation_t", + 'side': "cublasSideMode_t", + 'triangle': "cublasFillMode_t", + 'diagonal': "cublasDiagType_t", + }[x] + + def option_to_documentation(x): """Translates an option name to a documentation string""" return { diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 03da7985..49240095 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -290,6 +290,52 @@ def wrapper_cblas(routine): return result +def wrapper_cublas(routine): + """The wrapper to the reference cuBLAS routines (for performance/correctness testing)""" + result = "" + if routine.has_tests: + result += NL + "// Forwards the cuBLAS calls for %s" % routine.short_names_tested() + NL + if routine.no_scalars(): + result += routine.routine_header_wrapper_cublas(routine.template, True, 23) + ";" + NL + for flavour in routine.flavours: + result += routine.routine_header_wrapper_cublas(flavour, False, 23) + " {" + NL + + # There is a version available in cuBLAS + if flavour.precision_name in ["S", "D", "C", "Z"]: + indent = " " * (24 + routine.length()) + arguments = routine.arguments_wrapper_cublas(flavour) + result += " cublasHandle_t handle;" + NL + result += " auto status = cublas" + flavour.name + routine.name + "(handle, " + result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL + result += " cublasDestroy(handle);" + NL + result += " return status;" + + # There is no cuBLAS available, forward the call to one of the available functions + else: # Half-precision + result += " return CUBLAS_STATUS_NOT_SUPPORTED;" + # indent = " " * (24 + routine.length()) + + # # Convert to float (note: also integer buffers are stored as half/float) + # for buf in routine.inputs + routine.outputs: + # result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL + + # # Call the float routine + # result += " cublasHandle_t handle;" + NL + # result += " auto status = cublasX" + routine.name + "(handle," + # result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL + # result += " cublasDestroy(handle);" + NL + # result += " return status;" + NL + + # # Convert back to half + # for buf in routine.outputs: + # result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL + # result += " return status;" + + # Complete + result += NL + "}" + NL + return result + + def performance_test(routine, level_string): """Generates the body of a performance test for a specific routine""" result = "" diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 59b2ed73..9414eb50 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -257,7 +257,7 @@ class Routine: return [] def buffer_def_wrapper_cl(self, name, flavour): - """As above but with data-types""" + """As above but for OpenCL""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"] @@ -266,6 +266,16 @@ class Routine: return [", ".join(a + b + c)] return [] + def buffer_def_wrapper_cuda(self, name, flavour): + """As above but for CUDA""" + prefix = "const " if name in self.inputs else "" + if name in self.inputs or name in self.outputs: + a = [prefix + flavour.buffer_type + "* " + name + "_buffer"] + b = ["const size_t " + name + "_offset"] + c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else [] + return [", ".join(a + b + c)] + return [] + def buffer_def_vector(self, name, flavour): """As above but as vectors""" prefix = "const " if name in self.inputs else "" @@ -329,6 +339,18 @@ class Routine: return [", ".join(a + c)] return [] + def buffer_wrapper_cublas(self, name): + """As above but for cuBLAS the wrapper""" + if name in self.inputs or name in self.outputs: + a = ["&" + name + "_buffer[" + name + "_offset]"] + c = [] + if name in ["x", "y"]: + c = ["static_cast(" + name + "_" + self.postfix(name) + ")"] + elif name in ["a", "b", "c"]: + c = [name + "_" + self.postfix(name)] + return [", ".join(a + c)] + return [] + def buffer_type(self, name): """As above, but only data-types""" prefix = "const " if (name in self.inputs) else "" @@ -399,6 +421,16 @@ class Routine: return [name] return [] + def scalar_use_wrapper_by_ref(self, name, flavour): + """As above, but for the cuBLAS wrapper""" + if name in self.scalars: + if name == "alpha": + return ["&" + flavour.use_alpha_opencl()] + elif name == "beta": + return ["&" + flavour.use_beta_opencl()] + return [name] + return [] + def scalar_use_wrapper_cblas(self, name, flavour): """As above, but for the CBLAS wrapper""" if name in self.scalars: @@ -465,6 +497,12 @@ class Routine: return [", ".join([s for s in self.sizes])] return [] + def sizes_list_as_int(self): + """Retrieves a list of comma-separated sizes (m, n, k) cast to integers""" + if self.sizes: + return [", ".join(["static_cast(" + s + ")" for s in self.sizes])] + return [] + def sizes_def(self): """Retrieves the definition of the sizes (m,n,k)""" if self.sizes: @@ -531,6 +569,13 @@ class Routine: return [", ".join(definitions)] return [] + def options_def_wrapper_cublas(self): + """As above, but now using cuBLAS data-types""" + if self.options: + definitions = ["const " + convert.option_to_cublas(o) + " " + o for o in self.options] + return [", ".join(definitions)] + return [] + def options_type(self): """Retrieves the types of the options (layout, transpose, side, etc.)""" if self.options: @@ -615,7 +660,7 @@ class Routine: def arguments_wrapper_cblas(self, flavour): """As above, but for the CBLAS wrapper""" - return (self.options_list() + self.sizes_list() + + return (self.options_list() + self.sizes_list_as_int() + self.scalar_use_wrapper_cblas("alpha", flavour) + list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.buffers_first()])) + self.scalar_use_wrapper_cblas("beta", flavour) + @@ -623,6 +668,17 @@ class Routine: list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_use_wrapper_cblas(s, flavour) for s in self.other_scalars()]))) + def arguments_wrapper_cublas(self, flavour): + """As above, but for the cuBLAS wrapper""" + return (self.options_list() + self.sizes_list_as_int() + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_first()])) + + self.scalar_use_wrapper_by_ref("alpha", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_first()])) + + self.scalar_use_wrapper_by_ref("beta", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_second()])) + + list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_use_wrapper_by_ref(s, flavour) for s in self.other_scalars()]))) + def arguments_def(self, flavour): """Retrieves a combination of all the argument definitions""" return (self.options_def() + self.sizes_def() + @@ -683,6 +739,17 @@ class Routine: list(chain(*[self.buffer_def_vector(b, flavour) for b in self.scalar_buffers_second()])) + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_def_wrapper_cublas(self, flavour): + """As above, but cuBLAS wrapper plain data-types""" + return (self.options_def_wrapper_cublas() + self.sizes_def() + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_first()])) + + self.scalar_def_plain("alpha", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_first()])) + + self.scalar_def_plain("beta", flavour) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_second()])) + + list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()]))) + def arguments_type(self, flavour): """Retrieves a combination of all the argument types""" return (self.options_type() + self.sizes_type() + @@ -781,3 +848,17 @@ class Routine: result = "void cblasX" + self.name + "(" result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cblas(flavour)]) + ")" return result + + def routine_header_wrapper_cublas(self, flavour, def_only, spaces): + """As above, but now for the cuBLAS wrapper""" + template = "<" + flavour.template + ">" if self.no_scalars() and not def_only else "" + indent = " " * (spaces + self.length() + len(template)) + result = "" + if self.no_scalars(): + result += "template <" + if def_only: + result += flavour.name + result += ">\n" + result += "cublasStatus_t cublasX" + self.name + template + "(" + result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")" + return result diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp new file mode 100644 index 00000000..a0e274f0 --- /dev/null +++ b/test/wrapper_cublas.hpp @@ -0,0 +1,2418 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file implements a wrapper around the cuBLAS library, such that its routines can be called +// in a similar way as the CLBlast routines: using alpha and beta to determine the precision. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_WRAPPER_CUBLAS_H_ +#define CLBLAST_TEST_WRAPPER_CUBLAS_H_ + +#include +#include + +#include "utilities/utilities.hpp" + +namespace clblast { + +// Conversions from CLBlast types +cublasOperation_t convertToCUBLAS(const Transpose v) { return (v == Transpose::kNo) ? CUBLAS_OP_N : (v == Transpose::kYes) ? CUBLAS_OP_T : CUBLAS_OP_C; } +cublasFillMode_t convertToCUBLAS(const Triangle v) { return (v == Triangle::kUpper) ? CUBLAS_FILL_MODE_UPPER : CUBLAS_FILL_MODE_LOWER; } +cublasDiagType_t convertToCUBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT; } +cublasSideMode_t convertToCUBLAS(const Side v) { return (v == Side::kLeft) ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; } + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SROTG/DROTG +template +cublasStatus_t cublasXrotg(T* sa_buffer, const size_t sa_offset, + T* sb_buffer, const size_t sb_offset, + T* sc_buffer, const size_t sc_offset, + T* ss_buffer, const size_t ss_offset); +template <> +cublasStatus_t cublasXrotg(float* sa_buffer, const size_t sa_offset, + float* sb_buffer, const size_t sb_offset, + float* sc_buffer, const size_t sc_offset, + float* ss_buffer, const size_t ss_offset) { + cublasHandle_t handle; + auto status = cublasSrotg(handle, &sa_buffer[sa_offset], + &sb_buffer[sb_offset], + &sc_buffer[sc_offset], + &ss_buffer[ss_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotg(double* sa_buffer, const size_t sa_offset, + double* sb_buffer, const size_t sb_offset, + double* sc_buffer, const size_t sc_offset, + double* ss_buffer, const size_t ss_offset) { + cublasHandle_t handle; + auto status = cublasDrotg(handle, &sa_buffer[sa_offset], + &sb_buffer[sb_offset], + &sc_buffer[sc_offset], + &ss_buffer[ss_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROTMG/DROTMG +template +cublasStatus_t cublasXrotmg(T* sd1_buffer, const size_t sd1_offset, + T* sd2_buffer, const size_t sd2_offset, + T* sx1_buffer, const size_t sx1_offset, + const T* sy1_buffer, const size_t sy1_offset, + T* sparam_buffer, const size_t sparam_offset); +template <> +cublasStatus_t cublasXrotmg(float* sd1_buffer, const size_t sd1_offset, + float* sd2_buffer, const size_t sd2_offset, + float* sx1_buffer, const size_t sx1_offset, + const float* sy1_buffer, const size_t sy1_offset, + float* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset], + &sd2_buffer[sd2_offset], + &sx1_buffer[sx1_offset], + &sy1_buffer[sy1_offset], + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotmg(double* sd1_buffer, const size_t sd1_offset, + double* sd2_buffer, const size_t sd2_offset, + double* sx1_buffer, const size_t sx1_offset, + const double* sy1_buffer, const size_t sy1_offset, + double* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset], + &sd2_buffer[sd2_offset], + &sx1_buffer[sx1_offset], + &sy1_buffer[sy1_offset], + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROT/DROT +cublasStatus_t cublasXrot(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc, + const float cos, + const float sin) { + cublasHandle_t handle; + auto status = cublasSrot(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + cos, + sin); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXrot(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc, + const double cos, + const double sin) { + cublasHandle_t handle; + auto status = cublasDrot(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + cos, + sin); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SROTM/DROTM +template +cublasStatus_t cublasXrotm(const size_t n, + T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc, + T* sparam_buffer, const size_t sparam_offset); +template <> +cublasStatus_t cublasXrotm(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc, + float* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasSrotm(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXrotm(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc, + double* sparam_buffer, const size_t sparam_offset) { + cublasHandle_t handle; + auto status = cublasDrotm(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &sparam_buffer[sparam_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP +template +cublasStatus_t cublasXswap(const size_t n, + T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXswap(const size_t n, + float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSswap(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap(const size_t n, + double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDswap(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap(const size_t n, + float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCswap(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap(const size_t n, + double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZswap(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXswap(const size_t n, + half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL +cublasStatus_t cublasXscal(const size_t n, + const float alpha, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSscal(handle, static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const double alpha, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDscal(handle, static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const float2 alpha, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCscal(handle, static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const double2 alpha, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZscal(handle, static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXscal(const size_t n, + const half alpha, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY +template +cublasStatus_t cublasXcopy(const size_t n, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXcopy(const size_t n, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasScopy(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy(const size_t n, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDcopy(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy(const size_t n, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCcopy(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy(const size_t n, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZcopy(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXcopy(const size_t n, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY +cublasStatus_t cublasXaxpy(const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSaxpy(handle, static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDaxpy(handle, static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCaxpy(handle, static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZaxpy(handle, static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXaxpy(const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SDOT/DDOT +template +cublasStatus_t cublasXdot(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdot(const size_t n, + float* dot_buffer, const size_t dot_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSdot(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdot(const size_t n, + double* dot_buffer, const size_t dot_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDdot(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdot(const size_t n, + half* dot_buffer, const size_t dot_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CDOTU/ZDOTU +template +cublasStatus_t cublasXdotu(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdotu(const size_t n, + float2* dot_buffer, const size_t dot_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCdotu(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdotu(const size_t n, + double2* dot_buffer, const size_t dot_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZdotu(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CDOTC/ZDOTC +template +cublasStatus_t cublasXdotc(const size_t n, + T* dot_buffer, const size_t dot_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc, + const T* y_buffer, const size_t y_offset, const size_t y_inc); +template <> +cublasStatus_t cublasXdotc(const size_t n, + float2* dot_buffer, const size_t dot_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCdotc(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXdotc(const size_t n, + double2* dot_buffer, const size_t dot_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZdotc(handle, static_cast(n), + &dot_buffer[dot_offset], + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2 +template +cublasStatus_t cublasXnrm2(const size_t n, + T* nrm2_buffer, const size_t nrm2_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXnrm2(const size_t n, + float* nrm2_buffer, const size_t nrm2_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSnrm2(handle, static_cast(n), + &nrm2_buffer[nrm2_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2(const size_t n, + double* nrm2_buffer, const size_t nrm2_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDnrm2(handle, static_cast(n), + &nrm2_buffer[nrm2_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2(const size_t n, + float2* nrm2_buffer, const size_t nrm2_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasScnrm2(handle, static_cast(n), + &nrm2_buffer[nrm2_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2(const size_t n, + double2* nrm2_buffer, const size_t nrm2_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDznrm2(handle, static_cast(n), + &nrm2_buffer[nrm2_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXnrm2(const size_t n, + half* nrm2_buffer, const size_t nrm2_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SASUM/DASUM/ScASUM/DzASUM +template +cublasStatus_t cublasXasum(const size_t n, + T* asum_buffer, const size_t asum_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXasum(const size_t n, + float* asum_buffer, const size_t asum_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasSasum(handle, static_cast(n), + &asum_buffer[asum_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum(const size_t n, + double* asum_buffer, const size_t asum_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDasum(handle, static_cast(n), + &asum_buffer[asum_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum(const size_t n, + float2* asum_buffer, const size_t asum_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasScasum(handle, static_cast(n), + &asum_buffer[asum_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum(const size_t n, + double2* asum_buffer, const size_t asum_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDzasum(handle, static_cast(n), + &asum_buffer[asum_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXasum(const size_t n, + half* asum_buffer, const size_t asum_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX +template +cublasStatus_t cublasXamax(const size_t n, + T* imax_buffer, const size_t imax_offset, + const T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXamax(const size_t n, + float* imax_buffer, const size_t imax_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasiSamax(handle, static_cast(n), + &imax_buffer[imax_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax(const size_t n, + double* imax_buffer, const size_t imax_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasiDamax(handle, static_cast(n), + &imax_buffer[imax_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax(const size_t n, + float2* imax_buffer, const size_t imax_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasiCamax(handle, static_cast(n), + &imax_buffer[imax_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax(const size_t n, + double2* imax_buffer, const size_t imax_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasiZamax(handle, static_cast(n), + &imax_buffer[imax_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXamax(const size_t n, + half* imax_buffer, const size_t imax_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV +cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSgemv(handle, layout, a_transpose, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDgemv(handle, layout, a_transpose, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCgemv(handle, layout, a_transpose, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_float2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZgemv(handle, layout, a_transpose, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_double2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV +cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSgbmv(handle, layout, a_transpose, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDgbmv(handle, layout, a_transpose, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasCgbmv(handle, layout, a_transpose, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_float2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZgbmv(handle, layout, a_transpose, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_double2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHEMV/ZHEMV +cublasStatus_t cublasXhemv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasChemv(handle, layout, triangle, + static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_float2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhemv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZhemv(handle, layout, triangle, + static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_double2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHBMV/ZHBMV +cublasStatus_t cublasXhbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasChbmv(handle, layout, triangle, + static_cast(n), static_cast(k), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_float2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZhbmv(handle, layout, triangle, + static_cast(n), static_cast(k), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &cl_double2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPMV/ZHPMV +cublasStatus_t cublasXhpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* ap_buffer, const size_t ap_offset, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2 beta, + float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasChpmv(handle, layout, triangle, + static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc), + &cl_float2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* ap_buffer, const size_t ap_offset, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2 beta, + double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasZhpmv(handle, layout, triangle, + static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc), + &cl_double2{{beta.real(), beta.imag()}}, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYMV/DSYMV +cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSsymv(handle, layout, triangle, + static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDsymv(handle, layout, triangle, + static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSBMV/DSBMV +cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSsbmv(handle, layout, triangle, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDsbmv(handle, layout, triangle, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPMV/DSPMV +cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* ap_buffer, const size_t ap_offset, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float beta, + float* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasSspmv(handle, layout, triangle, + static_cast(n), + &alpha, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* ap_buffer, const size_t ap_offset, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double beta, + double* y_buffer, const size_t y_offset, const size_t y_inc) { + cublasHandle_t handle; + auto status = cublasDspmv(handle, layout, triangle, + static_cast(n), + &alpha, + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc), + &beta, + &y_buffer[y_offset], static_cast(y_inc)); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* ap_buffer, const size_t ap_offset, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + half* y_buffer, const size_t y_offset, const size_t y_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV +template +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStrmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtrmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtrmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtrmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV +template +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStbmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtbmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtbmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtbmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV +template +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* ap_buffer, const size_t ap_offset, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* ap_buffer, const size_t ap_offset, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStpmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* ap_buffer, const size_t ap_offset, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtpmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* ap_buffer, const size_t ap_offset, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtpmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* ap_buffer, const size_t ap_offset, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtpmv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const half* ap_buffer, const size_t ap_offset, + half* x_buffer, const size_t x_offset, const size_t x_inc) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV +template +cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStrsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtrsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtrsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtrsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV +template +cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const T* a_buffer, const size_t a_offset, const size_t a_ld, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStbsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtbsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtbsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, const size_t k, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtbsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), static_cast(k), + &a_buffer[a_offset], a_ld, + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV +template +cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const T* ap_buffer, const size_t ap_offset, + T* x_buffer, const size_t x_offset, const size_t x_inc); +template <> +cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float* ap_buffer, const size_t ap_offset, + float* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasStpsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double* ap_buffer, const size_t ap_offset, + double* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasDtpsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const float2* ap_buffer, const size_t ap_offset, + float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasCtpsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} +template <> +cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t n, + const double2* ap_buffer, const size_t ap_offset, + double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cublasHandle_t handle; + auto status = cublasZtpsv(handle, layout, triangle, a_transpose, diagonal, + static_cast(n), + &ap_buffer[ap_offset], + &x_buffer[x_offset], static_cast(x_inc)); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SGER/DGER +cublasStatus_t cublasXger(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasSger(handle, layout, + static_cast(m), static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXger(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasDger(handle, layout, + static_cast(m), static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXger(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CGERU/ZGERU +cublasStatus_t cublasXgeru(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasCgeru(handle, layout, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgeru(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasZgeru(handle, layout, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CGERC/ZGERC +cublasStatus_t cublasXgerc(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasCgerc(handle, layout, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgerc(const cublas_has_no_layout layout, + const size_t m, const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasZgerc(handle, layout, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHER/ZHER +cublasStatus_t cublasXher(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasCher(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasZher(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPR/ZHPR +cublasStatus_t cublasXhpr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + float2* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasChpr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + double2* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasZhpr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHER2/ZHER2 +cublasStatus_t cublasXher2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasCher2(handle, layout, triangle, + static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasZher2(handle, layout, triangle, + static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for CHPR2/ZHPR2 +cublasStatus_t cublasXhpr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float2 alpha, + const float2* x_buffer, const size_t x_offset, const size_t x_inc, + const float2* y_buffer, const size_t y_offset, const size_t y_inc, + float2* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasChpr2(handle, layout, triangle, + static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhpr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double2 alpha, + const double2* x_buffer, const size_t x_offset, const size_t x_inc, + const double2* y_buffer, const size_t y_offset, const size_t y_inc, + double2* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasZhpr2(handle, layout, triangle, + static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYR/DSYR +cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasSsyr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasDsyr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPR/DSPR +cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + float* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasSspr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + double* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasDspr(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + half* ap_buffer, const size_t ap_offset) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSYR2/DSYR2 +cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasSsyr2(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* a_buffer, const size_t a_offset, const size_t a_ld) { + cublasHandle_t handle; + auto status = cublasDsyr2(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &a_buffer[a_offset], a_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* a_buffer, const size_t a_offset, const size_t a_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSPR2/DSPR2 +cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const float alpha, + const float* x_buffer, const size_t x_offset, const size_t x_inc, + const float* y_buffer, const size_t y_offset, const size_t y_inc, + float* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasSspr2(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const double alpha, + const double* x_buffer, const size_t x_offset, const size_t x_inc, + const double* y_buffer, const size_t y_offset, const size_t y_inc, + double* ap_buffer, const size_t ap_offset) { + cublasHandle_t handle; + auto status = cublasDspr2(handle, layout, triangle, + static_cast(n), + &alpha, + &x_buffer[x_offset], static_cast(x_inc), + &y_buffer[y_offset], static_cast(y_inc), + &ap_buffer[ap_offset]); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, + const size_t n, + const half alpha, + const half* x_buffer, const size_t x_offset, const size_t x_inc, + const half* y_buffer, const size_t y_offset, const size_t y_inc, + half* ap_buffer, const size_t ap_offset) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines +// ================================================================================================= + +// Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM +cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasSgemm(handle, layout, a_transpose, b_transpose, + static_cast(m), static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasDgemm(handle, layout, a_transpose, b_transpose, + static_cast(m), static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCgemm(handle, layout, a_transpose, b_transpose, + static_cast(m), static_cast(n), static_cast(k), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_float2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZgemm(handle, layout, a_transpose, b_transpose, + static_cast(m), static_cast(n), static_cast(k), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_double2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, + const size_t m, const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM +cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasSsymm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasDsymm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCsymm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_float2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZsymm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_double2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHEMM/ZHEMM +cublasStatus_t cublasXhemm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasChemm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_float2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXhemm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZhemm(handle, layout, side, triangle, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_double2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK +cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasSsyrk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasDsyrk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCsyrk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &cl_float2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZsyrk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &cl_double2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHERK/ZHERK +cublasStatus_t cublasXherk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const float alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCherk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXherk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, + const size_t n, const size_t k, + const double alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZherk(handle, layout, triangle, a_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K +cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + const float* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasSsyr2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + const double* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasDsyr2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float2 beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCsyr2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_float2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double2 beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZsyr2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &cl_double2{{beta.real(), beta.imag()}}, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + const half* b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + half* c_buffer, const size_t c_offset, const size_t c_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for CHER2K/ZHER2K +cublasStatus_t cublasXher2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + const float2* b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + float2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasCher2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXher2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, + const size_t n, const size_t k, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + const double2* b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + double2* c_buffer, const size_t c_offset, const size_t c_ld) { + cublasHandle_t handle; + auto status = cublasZher2k(handle, layout, triangle, ab_transpose, + static_cast(n), static_cast(k), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld, + &beta, + &c_buffer[c_offset], c_ld); + cublasDestroy(handle); + return status; +} + +// Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM +cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasStrmm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasDtrmm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasCtrmm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasZtrmm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const half alpha, + const half* a_buffer, const size_t a_offset, const size_t a_ld, + half* b_buffer, const size_t b_offset, const size_t b_ld) { + return CUBLAS_STATUS_NOT_SUPPORTED; +} + +// Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM +cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float alpha, + const float* a_buffer, const size_t a_offset, const size_t a_ld, + float* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasStrsm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double alpha, + const double* a_buffer, const size_t a_offset, const size_t a_ld, + double* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasDtrsm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &alpha, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const float2 alpha, + const float2* a_buffer, const size_t a_offset, const size_t a_ld, + float2* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasCtrsm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &cl_float2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} +cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, + const size_t m, const size_t n, + const double2 alpha, + const double2* a_buffer, const size_t a_offset, const size_t a_ld, + double2* b_buffer, const size_t b_offset, const size_t b_ld) { + cublasHandle_t handle; + auto status = cublasZtrsm(handle, layout, side, triangle, a_transpose, diagonal, + static_cast(m), static_cast(n), + &cl_double2{{alpha.real(), alpha.imag()}}, + &a_buffer[a_offset], a_ld, + &b_buffer[b_offset], b_ld); + cublasDestroy(handle); + return status; +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_WRAPPER_CUBLAS_H_ +#endif -- cgit v1.2.3 From dbe22b5bf3da02a2d94280361cddde1f8f66b63f Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 6 Apr 2017 19:40:51 +0200 Subject: Fixed some size_t to int conversion warnings for the CBLAS interface --- test/wrapper_cblas.hpp | 268 ++++++++++++++++++++++++------------------------- 1 file changed, 134 insertions(+), 134 deletions(-) (limited to 'test') diff --git a/test/wrapper_cblas.hpp b/test/wrapper_cblas.hpp index dd610a6c..070d44b5 100644 --- a/test/wrapper_cblas.hpp +++ b/test/wrapper_cblas.hpp @@ -94,7 +94,7 @@ void cblasXrot(const size_t n, std::vector& y_buffer, const size_t y_offset, const size_t y_inc, const float cos, const float sin) { - cblas_srot(n, + cblas_srot(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), cos, @@ -105,7 +105,7 @@ void cblasXrot(const size_t n, std::vector& y_buffer, const size_t y_offset, const size_t y_inc, const double cos, const double sin) { - cblas_drot(n, + cblas_drot(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), cos, @@ -117,7 +117,7 @@ void cblasXrotm(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& sparam_buffer, const size_t sparam_offset) { - cblas_srotm(n, + cblas_srotm(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &sparam_buffer[sparam_offset]); @@ -126,7 +126,7 @@ void cblasXrotm(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& sparam_buffer, const size_t sparam_offset) { - cblas_drotm(n, + cblas_drotm(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &sparam_buffer[sparam_offset]); @@ -136,28 +136,28 @@ void cblasXrotm(const size_t n, void cblasXswap(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_sswap(n, + cblas_sswap(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } void cblasXswap(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_dswap(n, + cblas_dswap(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } void cblasXswap(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cswap(n, + cblas_cswap(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } void cblasXswap(const size_t n, std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zswap(n, + cblas_zswap(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } @@ -177,14 +177,14 @@ void cblasXswap(const size_t n, void cblasXscal(const size_t n, const float alpha, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - cblas_sscal(n, + cblas_sscal(static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc)); } void cblasXscal(const size_t n, const double alpha, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - cblas_dscal(n, + cblas_dscal(static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc)); } @@ -192,7 +192,7 @@ void cblasXscal(const size_t n, const float2 alpha, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; - cblas_cscal(n, + cblas_cscal(static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -200,7 +200,7 @@ void cblasXscal(const size_t n, const double2 alpha, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; - cblas_zscal(n, + cblas_zscal(static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -218,28 +218,28 @@ void cblasXscal(const size_t n, void cblasXcopy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_scopy(n, + cblas_scopy(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } void cblasXcopy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_dcopy(n, + cblas_dcopy(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } void cblasXcopy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_ccopy(n, + cblas_ccopy(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } void cblasXcopy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zcopy(n, + cblas_zcopy(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } @@ -259,7 +259,7 @@ void cblasXaxpy(const size_t n, const float alpha, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_saxpy(n, + cblas_saxpy(static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -268,7 +268,7 @@ void cblasXaxpy(const size_t n, const double alpha, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_daxpy(n, + cblas_daxpy(static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -278,7 +278,7 @@ void cblasXaxpy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; - cblas_caxpy(n, + cblas_caxpy(static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -288,7 +288,7 @@ void cblasXaxpy(const size_t n, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; - cblas_zaxpy(n, + cblas_zaxpy(static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -311,7 +311,7 @@ void cblasXdot(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - dot_buffer[dot_offset] = cblas_sdot(n, + dot_buffer[dot_offset] = cblas_sdot(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } @@ -319,7 +319,7 @@ void cblasXdot(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - dot_buffer[dot_offset] = cblas_ddot(n, + dot_buffer[dot_offset] = cblas_ddot(static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } @@ -342,7 +342,7 @@ void cblasXdotu(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cdotu_sub(n, + cblas_cdotu_sub(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); @@ -351,7 +351,7 @@ void cblasXdotu(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zdotu_sub(n, + cblas_zdotu_sub(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); @@ -362,7 +362,7 @@ void cblasXdotc(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_cdotc_sub(n, + cblas_cdotc_sub(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); @@ -371,7 +371,7 @@ void cblasXdotc(const size_t n, std::vector& dot_buffer, const size_t dot_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { - cblas_zdotc_sub(n, + cblas_zdotc_sub(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); @@ -381,25 +381,25 @@ void cblasXdotc(const size_t n, void cblasXnrm2(const size_t n, std::vector& nrm2_buffer, const size_t nrm2_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset] = cblas_snrm2(n, + nrm2_buffer[nrm2_offset] = cblas_snrm2(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXnrm2(const size_t n, std::vector& nrm2_buffer, const size_t nrm2_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset] = cblas_dnrm2(n, + nrm2_buffer[nrm2_offset] = cblas_dnrm2(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXnrm2(const size_t n, std::vector& nrm2_buffer, const size_t nrm2_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset].real(cblas_scnrm2(n, + nrm2_buffer[nrm2_offset].real(cblas_scnrm2(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } void cblasXnrm2(const size_t n, std::vector& nrm2_buffer, const size_t nrm2_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n, + nrm2_buffer[nrm2_offset].real(cblas_dznrm2(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } void cblasXnrm2(const size_t n, @@ -417,25 +417,25 @@ void cblasXnrm2(const size_t n, void cblasXasum(const size_t n, std::vector& asum_buffer, const size_t asum_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset] = cblas_sasum(n, + asum_buffer[asum_offset] = cblas_sasum(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXasum(const size_t n, std::vector& asum_buffer, const size_t asum_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset] = cblas_dasum(n, + asum_buffer[asum_offset] = cblas_dasum(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXasum(const size_t n, std::vector& asum_buffer, const size_t asum_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset].real(cblas_scasum(n, + asum_buffer[asum_offset].real(cblas_scasum(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } void cblasXasum(const size_t n, std::vector& asum_buffer, const size_t asum_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - asum_buffer[asum_offset].real(cblas_dzasum(n, + asum_buffer[asum_offset].real(cblas_dzasum(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } void cblasXasum(const size_t n, @@ -453,25 +453,25 @@ void cblasXasum(const size_t n, void cblasXamax(const size_t n, std::vector& imax_buffer, const size_t imax_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXamax(const size_t n, std::vector& imax_buffer, const size_t imax_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(static_cast(n), &x_buffer[x_offset], static_cast(x_inc)); } void cblasXamax(const size_t n, std::vector& imax_buffer, const size_t imax_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } void cblasXamax(const size_t n, std::vector& imax_buffer, const size_t imax_offset, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { - ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n, + ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } void cblasXamax(const size_t n, @@ -498,7 +498,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const float beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sgemv(layout, a_transpose, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -513,7 +513,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const double beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dgemv(layout, a_transpose, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -530,7 +530,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_cgemv(layout, a_transpose, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -547,7 +547,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zgemv(layout, a_transpose, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -583,7 +583,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const float beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -598,7 +598,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const double beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -615,7 +615,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_cgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -632,7 +632,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zgbmv(layout, a_transpose, - m, n, kl, ku, + static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -670,7 +670,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_chemv(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -687,7 +687,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zhemv(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -706,7 +706,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_chbmv(layout, triangle, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -723,7 +723,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zhbmv(layout, triangle, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -742,7 +742,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_chpmv(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -759,7 +759,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zhpmv(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -776,7 +776,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_ssymv(layout, triangle, - n, + static_cast(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -791,7 +791,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dsymv(layout, triangle, - n, + static_cast(n), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -827,7 +827,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_ssbmv(layout, triangle, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -842,7 +842,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dsbmv(layout, triangle, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc), @@ -878,7 +878,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const float beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_sspmv(layout, triangle, - n, + static_cast(n), alpha, &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc), @@ -893,7 +893,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const double beta, std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { cblas_dspmv(layout, triangle, - n, + static_cast(n), alpha, &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc), @@ -926,7 +926,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_strmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -935,7 +935,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -944,7 +944,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -953,7 +953,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztrmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -976,7 +976,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -985,7 +985,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -994,7 +994,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1003,7 +1003,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztbmv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1026,7 +1026,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); } @@ -1035,7 +1035,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); } @@ -1044,7 +1044,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1053,7 +1053,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztpmv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1076,7 +1076,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_strsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -1085,7 +1085,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -1094,7 +1094,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1103,7 +1103,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztrsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1114,7 +1114,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -1123,7 +1123,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); } @@ -1132,7 +1132,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1141,7 +1141,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztbsv(layout, triangle, a_transpose, diagonal, - n, k, + static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1152,7 +1152,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_stpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); } @@ -1161,7 +1161,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_dtpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); } @@ -1170,7 +1170,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ctpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1179,7 +1179,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const std::vector& ap_buffer, const size_t ap_offset, std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { cblas_ztpsv(layout, triangle, a_transpose, diagonal, - n, + static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } @@ -1192,7 +1192,7 @@ void cblasXger(const CBLAS_ORDER layout, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_sger(layout, - m, n, + static_cast(m), static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1205,7 +1205,7 @@ void cblasXger(const CBLAS_ORDER layout, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dger(layout, - m, n, + static_cast(m), static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1238,7 +1238,7 @@ void cblasXgeru(const CBLAS_ORDER layout, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_cgeru(layout, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1252,7 +1252,7 @@ void cblasXgeru(const CBLAS_ORDER layout, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_zgeru(layout, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1268,7 +1268,7 @@ void cblasXgerc(const CBLAS_ORDER layout, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_cgerc(layout, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1282,7 +1282,7 @@ void cblasXgerc(const CBLAS_ORDER layout, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_zgerc(layout, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1296,7 +1296,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_cher(layout, triangle, - n, + static_cast(n), alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); @@ -1307,7 +1307,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_zher(layout, triangle, - n, + static_cast(n), alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); @@ -1320,7 +1320,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_chpr(layout, triangle, - n, + static_cast(n), alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&ap_buffer[ap_offset])); @@ -1331,7 +1331,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_zhpr(layout, triangle, - n, + static_cast(n), alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&ap_buffer[ap_offset])); @@ -1346,7 +1346,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_cher2(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1360,7 +1360,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_zher2(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1376,7 +1376,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector& ap_buffer, const size_t ap_offset) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_chpr2(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1390,7 +1390,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, std::vector& ap_buffer, const size_t ap_offset) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_zhpr2(layout, triangle, - n, + static_cast(n), alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -1404,7 +1404,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_ssyr(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &a_buffer[a_offset], a_ld); @@ -1415,7 +1415,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dsyr(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &a_buffer[a_offset], a_ld); @@ -1442,7 +1442,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_sspr(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &ap_buffer[ap_offset]); @@ -1453,7 +1453,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_dspr(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &ap_buffer[ap_offset]); @@ -1481,7 +1481,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_ssyr2(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1494,7 +1494,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { cblas_dsyr2(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1526,7 +1526,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_sspr2(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1539,7 +1539,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, std::vector& ap_buffer, const size_t ap_offset) { cblas_dspr2(layout, triangle, - n, + static_cast(n), alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1576,7 +1576,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const float beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_sgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast(m), static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1591,7 +1591,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const double beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast(m), static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1608,7 +1608,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_cgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast(m), static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1625,7 +1625,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zgemm(layout, a_transpose, b_transpose, - m, n, k, + static_cast(m), static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1661,7 +1661,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const float beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssymm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1676,7 +1676,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const double beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsymm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1693,7 +1693,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_csymm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1710,7 +1710,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zsymm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1748,7 +1748,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_chemm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1765,7 +1765,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zhemm(layout, side, triangle, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1781,7 +1781,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const float beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssyrk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, beta, @@ -1794,7 +1794,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const double beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsyrk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, beta, @@ -1809,7 +1809,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_csyrk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, beta_array.data(), @@ -1824,7 +1824,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zsyrk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, beta_array.data(), @@ -1855,7 +1855,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const float beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_cherk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha, reinterpret_cast(&a_buffer[a_offset]), a_ld, beta, @@ -1868,7 +1868,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS const double beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_zherk(layout, triangle, a_transpose, - n, k, + static_cast(n), static_cast(k), alpha, reinterpret_cast(&a_buffer[a_offset]), a_ld, beta, @@ -1884,7 +1884,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const float beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_ssyr2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1899,7 +1899,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const double beta, std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { cblas_dsyr2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld, @@ -1916,7 +1916,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_csyr2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1933,7 +1933,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; const auto beta_array = std::vector{beta.real(), beta.imag()}; cblas_zsyr2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1970,7 +1970,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_cher2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -1986,7 +1986,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_zher2k(layout, triangle, ab_transpose, - n, k, + static_cast(n), static_cast(k), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld, @@ -2001,7 +2001,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_strmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2012,7 +2012,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_dtrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2024,7 +2024,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_ctrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); @@ -2036,7 +2036,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_ztrmm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); @@ -2063,7 +2063,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_strsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2074,7 +2074,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { cblas_dtrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); @@ -2086,7 +2086,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_ctrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); @@ -2098,7 +2098,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { const auto alpha_array = std::vector{alpha.real(), alpha.imag()}; cblas_ztrsm(layout, side, triangle, a_transpose, diagonal, - m, n, + static_cast(m), static_cast(n), alpha_array.data(), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); -- cgit v1.2.3 From 52dd7433caac3f30b6c02ed299ec1b16dc7614ea Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 6 Apr 2017 20:56:28 +0200 Subject: Completed the cuBLAS wrapper --- scripts/generator/generator.py | 2 +- scripts/generator/generator/convert.py | 2 +- scripts/generator/generator/cpp.py | 16 +- scripts/generator/generator/datatype.py | 5 + scripts/generator/generator/routine.py | 57 +- test/wrapper_cublas.hpp | 1252 +++++++++++++++++++------------ 6 files changed, 817 insertions(+), 517 deletions(-) (limited to 'test') diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 3f3fab62..8810397c 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -42,7 +42,7 @@ FILES = [ "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", ] -HEADER_LINES = [123, 76, 126, 23, 29, 41, 29, 65, 32] +HEADER_LINES = [122, 77, 126, 23, 29, 41, 29, 65, 32] FOOTER_LINES = [25, 138, 27, 38, 6, 6, 6, 9, 2] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 diff --git a/scripts/generator/generator/convert.py b/scripts/generator/generator/convert.py index 80b6f338..07f45669 100644 --- a/scripts/generator/generator/convert.py +++ b/scripts/generator/generator/convert.py @@ -59,7 +59,7 @@ def option_to_cblas(x): def option_to_cublas(x): """As above, but for clBLAS data-types""" return { - 'layout': "cublas_has_no_layout", + 'layout': "Layout", 'a_transpose': "cublasOperation_t", 'b_transpose': "cublasOperation_t", 'ab_transpose': "cublasOperation_t", diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 49240095..7c695dc8 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -304,8 +304,22 @@ def wrapper_cublas(routine): if flavour.precision_name in ["S", "D", "C", "Z"]: indent = " " * (24 + routine.length()) arguments = routine.arguments_wrapper_cublas(flavour) + + # Handles row-major + if routine.has_layout(): + result += " if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }" + NL + + # Complex scalars + for scalar in routine.scalars: + if flavour.is_complex(scalar): + cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex" + result += " " + cuda_complex + " " + scalar + "_cuda;" + NL + result += " " + scalar + "_cuda.x = " + scalar + ".real();" + NL + result += " " + scalar + "_cuda.y = " + scalar + ".imag();" + NL + + # Calls the cuBLAS routine result += " cublasHandle_t handle;" + NL - result += " auto status = cublas" + flavour.name + routine.name + "(handle, " + result += " auto status = cublas" + flavour.name_cublas() + routine.name + "(handle, " result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL result += " cublasDestroy(handle);" + NL result += " return status;" diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py index cab2411a..6ac5681a 100644 --- a/scripts/generator/generator/datatype.py +++ b/scripts/generator/generator/datatype.py @@ -87,6 +87,11 @@ class DataType: """Current type is of a non-standard type""" return self.buffer_type in [D_HALF, D_FLOAT2, D_DOUBLE2] + def name_cublas(self): + if "i" in self.name: + return "I" + self.name[1].lower() + return self.name + # Regular data-types H = DataType("H", "H", D_HALF, [D_HALF] * 2 + [D_HALF_OPENCL] * 2, D_HALF) # half (16) diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index 9414eb50..b1db484f 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -197,6 +197,10 @@ class Routine: """Determines whether or not this routine has scalar arguments (alpha/beta)""" return self.scalars == [] + def has_layout(self): + """Determines whether the layout is an argument""" + return "layout" in self.options + def short_names(self): """Returns the upper-case names of these routines (all flavours)""" return "/".join([f.name + self.upper_name() for f in self.flavours]) @@ -339,10 +343,16 @@ class Routine: return [", ".join(a + c)] return [] - def buffer_wrapper_cublas(self, name): + def buffer_wrapper_cublas(self, name, flavour): """As above but for cuBLAS the wrapper""" + prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - a = ["&" + name + "_buffer[" + name + "_offset]"] + if flavour.precision_name in ["C", "Z"]: + cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex" + a = ["reinterpret_cast<" + prefix + cuda_complex + "*>" + + "(&" + name + "_buffer[" + name + "_offset])"] + else: + a = ["&" + name + "_buffer[" + name + "_offset]"] c = [] if name in ["x", "y"]: c = ["static_cast(" + name + "_" + self.postfix(name) + ")"] @@ -421,16 +431,6 @@ class Routine: return [name] return [] - def scalar_use_wrapper_by_ref(self, name, flavour): - """As above, but for the cuBLAS wrapper""" - if name in self.scalars: - if name == "alpha": - return ["&" + flavour.use_alpha_opencl()] - elif name == "beta": - return ["&" + flavour.use_beta_opencl()] - return [name] - return [] - def scalar_use_wrapper_cblas(self, name, flavour): """As above, but for the CBLAS wrapper""" if name in self.scalars: @@ -439,6 +439,14 @@ class Routine: return [name] return [] + def scalar_use_wrapper_cublas(self, name, flavour): + """As above, but for the cuBLAS wrapper""" + if name in self.scalars: + if flavour.is_complex(name): + return ["&" + name + "_cuda"] + return ["&" + name] + return [] + def scalar_def(self, name, flavour): """Retrieves the definition of a scalar (alpha/beta)""" if name in self.scalars: @@ -534,6 +542,15 @@ class Routine: return [", ".join(self.options)] return [] + def options_list_no_layout(self): + """Retrieves a list of options""" + options = self.options[:] + if "layout" in options: + options.remove("layout") + if options: + return [", ".join(options)] + return [] + def options_cast(self, indent): """As above, but now casted to CLBlast data-types""" if self.options: @@ -670,14 +687,14 @@ class Routine: def arguments_wrapper_cublas(self, flavour): """As above, but for the cuBLAS wrapper""" - return (self.options_list() + self.sizes_list_as_int() + - list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_first()])) + - self.scalar_use_wrapper_by_ref("alpha", flavour) + - list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_first()])) + - self.scalar_use_wrapper_by_ref("beta", flavour) + - list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_second()])) + - list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_second()])) + - list(chain(*[self.scalar_use_wrapper_by_ref(s, flavour) for s in self.other_scalars()]))) + return (self.options_list_no_layout() + self.sizes_list_as_int() + + self.scalar_use_wrapper_cublas("alpha", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_first()])) + + self.scalar_use_wrapper_cublas("beta", flavour) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.buffers_second()])) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_first()])) + + list(chain(*[self.buffer_wrapper_cublas(b, flavour) for b in self.scalar_buffers_second()])) + + list(chain(*[self.scalar_use_wrapper_cublas(s, flavour) for s in self.other_scalars()]))) def arguments_def(self, flavour): """Retrieves a combination of all the argument definitions""" diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp index a0e274f0..22eb3971 100644 --- a/test/wrapper_cublas.hpp +++ b/test/wrapper_cublas.hpp @@ -113,8 +113,8 @@ cublasStatus_t cublasXrot(const size_t n, auto status = cublasSrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), - cos, - sin); + &cos, + &sin); cublasDestroy(handle); return status; } @@ -127,8 +127,8 @@ cublasStatus_t cublasXrot(const size_t n, auto status = cublasDrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), - cos, - sin); + &cos, + &sin); cublasDestroy(handle); return status; } @@ -199,8 +199,8 @@ cublasStatus_t cublasXswap(const size_t n, float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasCswap(handle, static_cast(n), - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -210,8 +210,8 @@ cublasStatus_t cublasXswap(const size_t n, double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasZswap(handle, static_cast(n), - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -246,20 +246,26 @@ cublasStatus_t cublasXscal(const size_t n, cublasStatus_t cublasXscal(const size_t n, const float2 alpha, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; auto status = cublasCscal(handle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc)); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } cublasStatus_t cublasXscal(const size_t n, const double2 alpha, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; auto status = cublasZscal(handle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc)); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } @@ -302,8 +308,8 @@ cublasStatus_t cublasXcopy(const size_t n, float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasCcopy(handle, static_cast(n), - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -313,8 +319,8 @@ cublasStatus_t cublasXcopy(const size_t n, double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasZcopy(handle, static_cast(n), - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -354,11 +360,14 @@ cublasStatus_t cublasXaxpy(const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; auto status = cublasCaxpy(handle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -366,11 +375,14 @@ cublasStatus_t cublasXaxpy(const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; auto status = cublasZaxpy(handle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } @@ -394,9 +406,9 @@ cublasStatus_t cublasXdot(const size_t n, const float* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasSdot(handle, static_cast(n), - &dot_buffer[dot_offset], &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + &y_buffer[y_offset], static_cast(y_inc), + &dot_buffer[dot_offset]); cublasDestroy(handle); return status; } @@ -407,9 +419,9 @@ cublasStatus_t cublasXdot(const size_t n, const double* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasDdot(handle, static_cast(n), - &dot_buffer[dot_offset], &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + &y_buffer[y_offset], static_cast(y_inc), + &dot_buffer[dot_offset]); cublasDestroy(handle); return status; } @@ -434,9 +446,9 @@ cublasStatus_t cublasXdotu(const size_t n, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasCdotu(handle, static_cast(n), - &dot_buffer[dot_offset], - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&dot_buffer[dot_offset])); cublasDestroy(handle); return status; } @@ -447,9 +459,9 @@ cublasStatus_t cublasXdotu(const size_t n, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasZdotu(handle, static_cast(n), - &dot_buffer[dot_offset], - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&dot_buffer[dot_offset])); cublasDestroy(handle); return status; } @@ -467,9 +479,9 @@ cublasStatus_t cublasXdotc(const size_t n, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasCdotc(handle, static_cast(n), - &dot_buffer[dot_offset], - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&dot_buffer[dot_offset])); cublasDestroy(handle); return status; } @@ -480,9 +492,9 @@ cublasStatus_t cublasXdotc(const size_t n, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; auto status = cublasZdotc(handle, static_cast(n), - &dot_buffer[dot_offset], - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&dot_buffer[dot_offset])); cublasDestroy(handle); return status; } @@ -498,8 +510,8 @@ cublasStatus_t cublasXnrm2(const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasSnrm2(handle, static_cast(n), - &nrm2_buffer[nrm2_offset], - &x_buffer[x_offset], static_cast(x_inc)); + &x_buffer[x_offset], static_cast(x_inc), + &nrm2_buffer[nrm2_offset]); cublasDestroy(handle); return status; } @@ -509,8 +521,8 @@ cublasStatus_t cublasXnrm2(const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasDnrm2(handle, static_cast(n), - &nrm2_buffer[nrm2_offset], - &x_buffer[x_offset], static_cast(x_inc)); + &x_buffer[x_offset], static_cast(x_inc), + &nrm2_buffer[nrm2_offset]); cublasDestroy(handle); return status; } @@ -520,8 +532,8 @@ cublasStatus_t cublasXnrm2(const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasScnrm2(handle, static_cast(n), - &nrm2_buffer[nrm2_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&nrm2_buffer[nrm2_offset])); cublasDestroy(handle); return status; } @@ -531,8 +543,8 @@ cublasStatus_t cublasXnrm2(const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasDznrm2(handle, static_cast(n), - &nrm2_buffer[nrm2_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&nrm2_buffer[nrm2_offset])); cublasDestroy(handle); return status; } @@ -554,8 +566,8 @@ cublasStatus_t cublasXasum(const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasSasum(handle, static_cast(n), - &asum_buffer[asum_offset], - &x_buffer[x_offset], static_cast(x_inc)); + &x_buffer[x_offset], static_cast(x_inc), + &asum_buffer[asum_offset]); cublasDestroy(handle); return status; } @@ -565,8 +577,8 @@ cublasStatus_t cublasXasum(const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasDasum(handle, static_cast(n), - &asum_buffer[asum_offset], - &x_buffer[x_offset], static_cast(x_inc)); + &x_buffer[x_offset], static_cast(x_inc), + &asum_buffer[asum_offset]); cublasDestroy(handle); return status; } @@ -576,8 +588,8 @@ cublasStatus_t cublasXasum(const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasScasum(handle, static_cast(n), - &asum_buffer[asum_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&asum_buffer[asum_offset])); cublasDestroy(handle); return status; } @@ -587,8 +599,8 @@ cublasStatus_t cublasXasum(const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; auto status = cublasDzasum(handle, static_cast(n), - &asum_buffer[asum_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&asum_buffer[asum_offset])); cublasDestroy(handle); return status; } @@ -609,9 +621,9 @@ cublasStatus_t cublasXamax(const size_t n, float* imax_buffer, const size_t imax_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; - auto status = cublasiSamax(handle, static_cast(n), - &imax_buffer[imax_offset], - &x_buffer[x_offset], static_cast(x_inc)); + auto status = cublasIsamax(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &imax_buffer[imax_offset]); cublasDestroy(handle); return status; } @@ -620,9 +632,9 @@ cublasStatus_t cublasXamax(const size_t n, double* imax_buffer, const size_t imax_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; - auto status = cublasiDamax(handle, static_cast(n), - &imax_buffer[imax_offset], - &x_buffer[x_offset], static_cast(x_inc)); + auto status = cublasIdamax(handle, static_cast(n), + &x_buffer[x_offset], static_cast(x_inc), + &imax_buffer[imax_offset]); cublasDestroy(handle); return status; } @@ -631,9 +643,9 @@ cublasStatus_t cublasXamax(const size_t n, float2* imax_buffer, const size_t imax_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; - auto status = cublasiCamax(handle, static_cast(n), - &imax_buffer[imax_offset], - &x_buffer[x_offset], static_cast(x_inc)); + auto status = cublasIcamax(handle, static_cast(n), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -642,9 +654,9 @@ cublasStatus_t cublasXamax(const size_t n, double2* imax_buffer, const size_t imax_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; - auto status = cublasiZamax(handle, static_cast(n), - &imax_buffer[imax_offset], - &x_buffer[x_offset], static_cast(x_inc)); + auto status = cublasIzamax(handle, static_cast(n), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -660,15 +672,16 @@ cublasStatus_t cublasXamax(const size_t n, // ================================================================================================= // Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV -cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSgemv(handle, layout, a_transpose, + auto status = cublasSgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -678,15 +691,16 @@ cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDgemv(handle, layout, a_transpose, + auto status = cublasDgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -696,43 +710,57 @@ cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasCgemv(handle, layout, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgemv(handle, a_transpose, static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_float2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasZgemv(handle, layout, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgemv(handle, a_transpose, static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_double2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -743,15 +771,16 @@ cublasStatus_t cublasXgemv(const cublas_has_no_layout layout, const cublasOperat } // Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV -cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSgbmv(handle, layout, a_transpose, + auto status = cublasSgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, &a_buffer[a_offset], a_ld, @@ -761,15 +790,16 @@ cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDgbmv(handle, layout, a_transpose, + auto status = cublasDgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, &a_buffer[a_offset], a_ld, @@ -779,43 +809,57 @@ cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasCgbmv(handle, layout, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_float2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasZgbmv(handle, layout, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_double2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -826,129 +870,172 @@ cublasStatus_t cublasXgbmv(const cublas_has_no_layout layout, const cublasOperat } // Forwards the cuBLAS calls for CHEMV/ZHEMV -cublasStatus_t cublasXhemv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasChemv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChemv(handle, triangle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_float2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhemv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasZhemv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhemv(handle, triangle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_double2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHBMV/ZHBMV -cublasStatus_t cublasXhbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasChbmv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChbmv(handle, triangle, static_cast(n), static_cast(k), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_float2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasZhbmv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhbmv(handle, triangle, static_cast(n), static_cast(k), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc), - &cl_double2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHPMV/ZHPMV -cublasStatus_t cublasXhpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* ap_buffer, const size_t ap_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2 beta, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasChpmv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChpmv(handle, triangle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc), - &cl_float2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* ap_buffer, const size_t ap_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2 beta, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - auto status = cublasZhpmv(handle, layout, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhpmv(handle, triangle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc), - &cl_double2{{beta.real(), beta.imag()}}, - &y_buffer[y_offset], static_cast(y_inc)); + &alpha_cuda, + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + &beta_cuda, + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for SSYMV/DSYMV -cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsymv(handle, layout, triangle, + auto status = cublasSsymv(handle, triangle, static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -958,15 +1045,16 @@ cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsymv(handle, layout, triangle, + auto status = cublasDsymv(handle, triangle, static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -976,7 +1064,7 @@ cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -987,15 +1075,16 @@ cublasStatus_t cublasXsymv(const cublas_has_no_layout layout, const cublasFillMo } // Forwards the cuBLAS calls for SSBMV/DSBMV -cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsbmv(handle, layout, triangle, + auto status = cublasSsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -1005,15 +1094,16 @@ cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsbmv(handle, layout, triangle, + auto status = cublasDsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -1023,7 +1113,7 @@ cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1034,15 +1124,16 @@ cublasStatus_t cublasXsbmv(const cublas_has_no_layout layout, const cublasFillMo } // Forwards the cuBLAS calls for SSPMV/DSPMV -cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* ap_buffer, const size_t ap_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSspmv(handle, layout, triangle, + auto status = cublasSspmv(handle, triangle, static_cast(n), &alpha, &ap_buffer[ap_offset], @@ -1052,15 +1143,16 @@ cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* ap_buffer, const size_t ap_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDspmv(handle, layout, triangle, + auto status = cublasDspmv(handle, triangle, static_cast(n), &alpha, &ap_buffer[ap_offset], @@ -1070,7 +1162,7 @@ cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* ap_buffer, const size_t ap_offset, @@ -1082,17 +1174,18 @@ cublasStatus_t cublasXspmv(const cublas_has_no_layout layout, const cublasFillMo // Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV template -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStrmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1100,12 +1193,13 @@ cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtrmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1113,33 +1207,35 @@ cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtrmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtrmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const half* a_buffer, const size_t a_offset, const size_t a_ld, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1148,17 +1244,18 @@ cublasStatus_t cublasXtrmv(const cublas_has_no_layout layout, const cublas // Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV template -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStbmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1166,12 +1263,13 @@ cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtbmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1179,33 +1277,35 @@ cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtbmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtbmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const half* a_buffer, const size_t a_offset, const size_t a_ld, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1214,17 +1314,18 @@ cublasStatus_t cublasXtbmv(const cublas_has_no_layout layout, const cublas // Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV template -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* ap_buffer, const size_t ap_offset, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* ap_buffer, const size_t ap_offset, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStpmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); @@ -1232,12 +1333,13 @@ cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* ap_buffer, const size_t ap_offset, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtpmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); @@ -1245,33 +1347,35 @@ cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* ap_buffer, const size_t ap_offset, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtpmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* ap_buffer, const size_t ap_offset, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtpmv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const half* ap_buffer, const size_t ap_offset, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1280,17 +1384,18 @@ cublasStatus_t cublasXtpmv(const cublas_has_no_layout layout, const cublas // Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV template -cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStrsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1298,12 +1403,13 @@ cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtrsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1311,45 +1417,48 @@ cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtrsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtrsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtrsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV template -cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStbsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1357,12 +1466,13 @@ cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtbsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); @@ -1370,45 +1480,48 @@ cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtbsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtbsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtbsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), - &a_buffer[a_offset], a_ld, - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV template -cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* ap_buffer, const size_t ap_offset, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* ap_buffer, const size_t ap_offset, float* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStpsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasStpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); @@ -1416,12 +1529,13 @@ cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cubla return status; } template <> -cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* ap_buffer, const size_t ap_offset, double* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtpsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); @@ -1429,42 +1543,44 @@ cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cubl return status; } template <> -cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* ap_buffer, const size_t ap_offset, float2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCtpsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } template <> -cublasStatus_t cublasXtpsv(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* ap_buffer, const size_t ap_offset, double2* x_buffer, const size_t x_offset, const size_t x_inc) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZtpsv(handle, layout, triangle, a_transpose, diagonal, + auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), - &ap_buffer[ap_offset], - &x_buffer[x_offset], static_cast(x_inc)); + reinterpret_cast(&ap_buffer[ap_offset]), + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for SGER/DGER -cublasStatus_t cublasXger(const cublas_has_no_layout layout, +cublasStatus_t cublasXger(const Layout layout, const size_t m, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSger(handle, layout, - static_cast(m), static_cast(n), + auto status = cublasSger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1472,15 +1588,15 @@ cublasStatus_t cublasXger(const cublas_has_no_layout layout, cublasDestroy(handle); return status; } -cublasStatus_t cublasXger(const cublas_has_no_layout layout, +cublasStatus_t cublasXger(const Layout layout, const size_t m, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDger(handle, layout, - static_cast(m), static_cast(n), + auto status = cublasDger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -1488,7 +1604,7 @@ cublasStatus_t cublasXger(const cublas_has_no_layout layout, cublasDestroy(handle); return status; } -cublasStatus_t cublasXger(const cublas_has_no_layout layout, +cublasStatus_t cublasXger(const Layout layout, const size_t m, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1498,209 +1614,242 @@ cublasStatus_t cublasXger(const cublas_has_no_layout layout, } // Forwards the cuBLAS calls for CGERU/ZGERU -cublasStatus_t cublasXgeru(const cublas_has_no_layout layout, +cublasStatus_t cublasXgeru(const Layout layout, const size_t m, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc, float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCgeru(handle, layout, - static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + auto status = cublasCgeru(handle, static_cast(m), static_cast(n), + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgeru(const cublas_has_no_layout layout, +cublasStatus_t cublasXgeru(const Layout layout, const size_t m, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc, double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZgeru(handle, layout, - static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + auto status = cublasZgeru(handle, static_cast(m), static_cast(n), + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CGERC/ZGERC -cublasStatus_t cublasXgerc(const cublas_has_no_layout layout, +cublasStatus_t cublasXgerc(const Layout layout, const size_t m, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc, float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCgerc(handle, layout, - static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + auto status = cublasCgerc(handle, static_cast(m), static_cast(n), + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgerc(const cublas_has_no_layout layout, +cublasStatus_t cublasXgerc(const Layout layout, const size_t m, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc, double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZgerc(handle, layout, - static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + auto status = cublasZgerc(handle, static_cast(m), static_cast(n), + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHER/ZHER -cublasStatus_t cublasXher(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCher(handle, layout, triangle, + auto status = cublasCher(handle, triangle, static_cast(n), &alpha, - &x_buffer[x_offset], static_cast(x_inc), - &a_buffer[a_offset], a_ld); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXher(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZher(handle, layout, triangle, + auto status = cublasZher(handle, triangle, static_cast(n), &alpha, - &x_buffer[x_offset], static_cast(x_inc), - &a_buffer[a_offset], a_ld); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHPR/ZHPR -cublasStatus_t cublasXhpr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasChpr(handle, layout, triangle, + auto status = cublasChpr(handle, triangle, static_cast(n), &alpha, - &x_buffer[x_offset], static_cast(x_inc), - &ap_buffer[ap_offset]); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&ap_buffer[ap_offset])); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhpr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZhpr(handle, layout, triangle, + auto status = cublasZhpr(handle, triangle, static_cast(n), &alpha, - &x_buffer[x_offset], static_cast(x_inc), - &ap_buffer[ap_offset]); + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&ap_buffer[ap_offset])); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHER2/ZHER2 -cublasStatus_t cublasXher2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc, float2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCher2(handle, layout, triangle, + auto status = cublasCher2(handle, triangle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXher2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc, double2* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZher2(handle, layout, triangle, + auto status = cublasZher2(handle, triangle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &a_buffer[a_offset], a_ld); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&a_buffer[a_offset]), a_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for CHPR2/ZHPR2 -cublasStatus_t cublasXhpr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc, float2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasChpr2(handle, layout, triangle, + auto status = cublasChpr2(handle, triangle, static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &ap_buffer[ap_offset]); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&ap_buffer[ap_offset])); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhpr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc, double2* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZhpr2(handle, layout, triangle, + auto status = cublasZhpr2(handle, triangle, static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &x_buffer[x_offset], static_cast(x_inc), - &y_buffer[y_offset], static_cast(y_inc), - &ap_buffer[ap_offset]); + &alpha_cuda, + reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), + reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), + reinterpret_cast(&ap_buffer[ap_offset])); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for SSYR/DSYR -cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsyr(handle, layout, triangle, + auto status = cublasSsyr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1708,13 +1857,14 @@ cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMod cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsyr(handle, layout, triangle, + auto status = cublasDsyr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1722,7 +1872,7 @@ cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMod cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1731,13 +1881,14 @@ cublasStatus_t cublasXsyr(const cublas_has_no_layout layout, const cublasFillMod } // Forwards the cuBLAS calls for SSPR/DSPR -cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSspr(handle, layout, triangle, + auto status = cublasSspr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1745,13 +1896,14 @@ cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMod cublasDestroy(handle); return status; } -cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDspr(handle, layout, triangle, + auto status = cublasDspr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1759,7 +1911,7 @@ cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMod cublasDestroy(handle); return status; } -cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1768,14 +1920,15 @@ cublasStatus_t cublasXspr(const cublas_has_no_layout layout, const cublasFillMod } // Forwards the cuBLAS calls for SSYR2/DSYR2 -cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsyr2(handle, layout, triangle, + auto status = cublasSsyr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1784,14 +1937,15 @@ cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsyr2(handle, layout, triangle, + auto status = cublasDsyr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1800,7 +1954,7 @@ cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1810,14 +1964,15 @@ cublasStatus_t cublasXsyr2(const cublas_has_no_layout layout, const cublasFillMo } // Forwards the cuBLAS calls for SSPR2/DSPR2 -cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSspr2(handle, layout, triangle, + auto status = cublasSspr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1826,14 +1981,15 @@ cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* ap_buffer, const size_t ap_offset) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDspr2(handle, layout, triangle, + auto status = cublasDspr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1842,7 +1998,7 @@ cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1856,15 +2012,16 @@ cublasStatus_t cublasXspr2(const cublas_has_no_layout layout, const cublasFillMo // ================================================================================================= // Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM -cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* b_buffer, const size_t b_offset, const size_t b_ld, const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSgemm(handle, layout, a_transpose, b_transpose, + auto status = cublasSgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -1874,15 +2031,16 @@ cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* b_buffer, const size_t b_offset, const size_t b_ld, const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDgemm(handle, layout, a_transpose, b_transpose, + auto status = cublasDgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -1892,43 +2050,57 @@ cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperat cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasCgemm(handle, layout, a_transpose, b_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_float2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasZgemm(handle, layout, a_transpose, b_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_double2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1939,15 +2111,16 @@ cublasStatus_t cublasXgemm(const cublas_has_no_layout layout, const cublasOperat } // Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM -cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* b_buffer, const size_t b_offset, const size_t b_ld, const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsymm(handle, layout, side, triangle, + auto status = cublasSsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -1957,15 +2130,16 @@ cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* b_buffer, const size_t b_offset, const size_t b_ld, const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsymm(handle, layout, side, triangle, + auto status = cublasDsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -1975,43 +2149,57 @@ cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasCsymm(handle, layout, side, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsymm(handle, side, triangle, static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_float2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasZsymm(handle, layout, side, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsymm(handle, side, triangle, static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_double2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2022,52 +2210,67 @@ cublasStatus_t cublasXsymm(const cublas_has_no_layout layout, const cublasSideMo } // Forwards the cuBLAS calls for CHEMM/ZHEMM -cublasStatus_t cublasXhemm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasChemm(handle, layout, side, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasChemm(handle, side, triangle, static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_float2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXhemm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasZhemm(handle, layout, side, triangle, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZhemm(handle, side, triangle, static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_double2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK -cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsyrk(handle, layout, triangle, a_transpose, + auto status = cublasSsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -2076,14 +2279,15 @@ cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsyrk(handle, layout, triangle, a_transpose, + auto status = cublasDsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -2092,39 +2296,53 @@ cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2 beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasCsyrk(handle, layout, triangle, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &cl_float2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2 beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasZsyrk(handle, layout, triangle, a_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &cl_double2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2134,49 +2352,52 @@ cublasStatus_t cublasXsyrk(const cublas_has_no_layout layout, const cublasFillMo } // Forwards the cuBLAS calls for CHERK/ZHERK -cublasStatus_t cublasXherk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasCherk(handle, layout, triangle, a_transpose, + auto status = cublasCherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, - &a_buffer[a_offset], a_ld, + reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta, - &c_buffer[c_offset], c_ld); + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXherk(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasZherk(handle, layout, triangle, a_transpose, + auto status = cublasZherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, - &a_buffer[a_offset], a_ld, + reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta, - &c_buffer[c_offset], c_ld); + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K -cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float* b_buffer, const size_t b_offset, const size_t b_ld, const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasSsyr2k(handle, layout, triangle, ab_transpose, + auto status = cublasSsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -2186,15 +2407,16 @@ cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillM cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double* b_buffer, const size_t b_offset, const size_t b_ld, const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDsyr2k(handle, layout, triangle, ab_transpose, + auto status = cublasDsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, @@ -2204,43 +2426,57 @@ cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillM cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* b_buffer, const size_t b_offset, const size_t b_ld, const float2 beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasCsyr2k(handle, layout, triangle, ab_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasCsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_float2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* b_buffer, const size_t b_offset, const size_t b_ld, const double2 beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { - cublasHandle_t handle; - auto status = cublasZsyr2k(handle, layout, triangle, ab_transpose, + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); + cuDoubleComplex beta_cuda; + beta_cuda.x = beta.real(); + beta_cuda.y = beta.imag(); + cublasHandle_t handle; + auto status = cublasZsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, - &cl_double2{{beta.real(), beta.imag()}}, - &c_buffer[c_offset], c_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, + &beta_cuda, + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2251,51 +2487,60 @@ cublasStatus_t cublasXsyr2k(const cublas_has_no_layout layout, const cublasFillM } // Forwards the cuBLAS calls for CHER2K/ZHER2K -cublasStatus_t cublasXher2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float2* b_buffer, const size_t b_offset, const size_t b_ld, const float beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCher2k(handle, layout, triangle, ab_transpose, + auto status = cublasCher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta, - &c_buffer[c_offset], c_ld); + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXher2k(const cublas_has_no_layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double2* b_buffer, const size_t b_offset, const size_t b_ld, const double beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZher2k(handle, layout, triangle, ab_transpose, + auto status = cublasZher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld, + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta, - &c_buffer[c_offset], c_ld); + reinterpret_cast(&c_buffer[c_offset]), c_ld); cublasDestroy(handle); return status; } // Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM -cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStrmm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -2303,13 +2548,14 @@ cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtrmm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -2317,35 +2563,43 @@ cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCtrmm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZtrmm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2354,13 +2608,14 @@ cublasStatus_t cublasXtrmm(const cublas_has_no_layout layout, const cublasSideMo } // Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM -cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasStrsm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -2368,13 +2623,14 @@ cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; - auto status = cublasDtrsm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, @@ -2382,31 +2638,39 @@ cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMo cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasCtrsm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), - &cl_float2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; } -cublasStatus_t cublasXtrsm(const cublas_has_no_layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* b_buffer, const size_t b_offset, const size_t b_ld) { + if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } + cuDoubleComplex alpha_cuda; + alpha_cuda.x = alpha.real(); + alpha_cuda.y = alpha.imag(); cublasHandle_t handle; - auto status = cublasZtrsm(handle, layout, side, triangle, a_transpose, diagonal, + auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), - &cl_double2{{alpha.real(), alpha.imag()}}, - &a_buffer[a_offset], a_ld, - &b_buffer[b_offset], b_ld); + &alpha_cuda, + reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; } -- cgit v1.2.3 From 6b625f8915ce0596d65187bd3a8eb47e91a0084e Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Mon, 10 Apr 2017 22:54:14 +0200 Subject: Added reference implementations for performance-testing against cuBLAS --- test/correctness/tester.cpp | 12 +++++++++--- test/routines/common.hpp | 3 ++- test/routines/level1/xamax.hpp | 10 ++++++++++ test/routines/level1/xasum.hpp | 10 ++++++++++ test/routines/level1/xaxpy.hpp | 10 ++++++++++ test/routines/level1/xcopy.hpp | 10 ++++++++++ test/routines/level1/xdot.hpp | 11 +++++++++++ test/routines/level1/xdotc.hpp | 11 +++++++++++ test/routines/level1/xdotu.hpp | 11 +++++++++++ test/routines/level1/xnrm2.hpp | 10 ++++++++++ test/routines/level1/xscal.hpp | 9 +++++++++ test/routines/level1/xswap.hpp | 10 ++++++++++ test/routines/level2/xgbmv.hpp | 13 +++++++++++++ test/routines/level2/xgemv.hpp | 13 +++++++++++++ test/routines/level2/xger.hpp | 12 ++++++++++++ test/routines/level2/xgerc.hpp | 12 ++++++++++++ test/routines/level2/xgeru.hpp | 12 ++++++++++++ test/routines/level2/xhbmv.hpp | 13 +++++++++++++ test/routines/level2/xhemv.hpp | 13 +++++++++++++ test/routines/level2/xher.hpp | 12 ++++++++++++ test/routines/level2/xher2.hpp | 13 +++++++++++++ test/routines/level2/xhpmv.hpp | 13 +++++++++++++ test/routines/level2/xhpr.hpp | 12 ++++++++++++ test/routines/level2/xhpr2.hpp | 13 +++++++++++++ test/routines/level2/xsbmv.hpp | 13 +++++++++++++ test/routines/level2/xspmv.hpp | 13 +++++++++++++ test/routines/level2/xspr.hpp | 12 ++++++++++++ test/routines/level2/xspr2.hpp | 13 +++++++++++++ test/routines/level2/xsymv.hpp | 13 +++++++++++++ test/routines/level2/xsyr.hpp | 12 ++++++++++++ test/routines/level2/xsyr2.hpp | 13 +++++++++++++ test/routines/level2/xtbmv.hpp | 14 ++++++++++++++ test/routines/level2/xtpmv.hpp | 14 ++++++++++++++ test/routines/level2/xtrmv.hpp | 14 ++++++++++++++ test/routines/level2/xtrsv.hpp | 14 ++++++++++++++ test/routines/level3/xgemm.hpp | 14 ++++++++++++++ test/routines/level3/xhemm.hpp | 14 ++++++++++++++ test/routines/level3/xher2k.hpp | 14 ++++++++++++++ test/routines/level3/xherk.hpp | 13 +++++++++++++ test/routines/level3/xsymm.hpp | 14 ++++++++++++++ test/routines/level3/xsyr2k.hpp | 14 ++++++++++++++ test/routines/level3/xsyrk.hpp | 13 +++++++++++++ test/routines/level3/xtrmm.hpp | 15 +++++++++++++++ test/routines/level3/xtrsm.hpp | 15 +++++++++++++++ test/routines/levelx/xaxpybatched.hpp | 13 +++++++++++++ test/routines/levelx/xgemmbatched.hpp | 17 +++++++++++++++++ test/routines/levelx/xinvert.hpp | 3 +++ test/routines/levelx/xomatcopy.hpp | 3 +++ 48 files changed, 571 insertions(+), 4 deletions(-) (limited to 'test') diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp index b352c1aa..d1f3cbb2 100644 --- a/test/correctness/tester.cpp +++ b/test/correctness/tester.cpp @@ -117,9 +117,15 @@ Tester::Tester(const std::vector &arguments, const bool silent options_ = options; // Determines which reference is the default - auto default_clblas = 0; - auto default_cblas = 0; - auto default_cublas = 0; + #if defined(CLBLAST_REF_CBLAS) + auto default_cblas = 0; + #endif + #if defined(CLBLAST_REF_CLBLAS) + auto default_clblas = 0; + #endif + #if defined(CLBLAST_REF_CUBLAS) + auto default_cublas = 0; + #endif #if defined(CLBLAST_REF_CBLAS) default_cblas = 1; #elif defined(CLBLAST_REF_CLBLAS) diff --git a/test/routines/common.hpp b/test/routines/common.hpp index 1abf5528..9708288a 100644 --- a/test/routines/common.hpp +++ b/test/routines/common.hpp @@ -25,8 +25,9 @@ #ifdef CLBLAST_REF_CBLAS #include "test/wrapper_cblas.hpp" #endif +#include "test/wrapper_cuda.hpp" #ifdef CLBLAST_REF_CUBLAS - #include "test/wrapper_cuda.hpp" + #include "test/wrapper_cublas.hpp" #endif // ================================================================================================= diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp index fccefc73..dcd48a47 100644 --- a/test/routines/level1/xamax.hpp +++ b/test/routines/level1/xamax.hpp @@ -103,6 +103,16 @@ class TestXamax { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXamax(args.n, + buffers.scalar, args.imax_offset, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp index f0fca4d3..e7e41fe5 100644 --- a/test/routines/level1/xasum.hpp +++ b/test/routines/level1/xasum.hpp @@ -103,6 +103,16 @@ class TestXasum { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXasum(args.n, + buffers.scalar, args.asum_offset, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index 8426d739..98f0e380 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -104,6 +104,16 @@ class TestXaxpy { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXaxpy(args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp index d1e7f49e..65e498ee 100644 --- a/test/routines/level1/xcopy.hpp +++ b/test/routines/level1/xcopy.hpp @@ -103,6 +103,16 @@ class TestXcopy { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXcopy(args.n, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp index cb3d7979..c95b16ef 100644 --- a/test/routines/level1/xdot.hpp +++ b/test/routines/level1/xdot.hpp @@ -110,6 +110,17 @@ class TestXdot { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXdot(args.n, + buffers.scalar, args.dot_offset, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp index 10ecbda6..0c99be25 100644 --- a/test/routines/level1/xdotc.hpp +++ b/test/routines/level1/xdotc.hpp @@ -110,6 +110,17 @@ class TestXdotc { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXdotc(args.n, + buffers.scalar, args.dot_offset, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp index 6efd270e..bf6bcd80 100644 --- a/test/routines/level1/xdotu.hpp +++ b/test/routines/level1/xdotu.hpp @@ -110,6 +110,17 @@ class TestXdotu { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXdotu(args.n, + buffers.scalar, args.dot_offset, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp index 0ba24b13..096604d1 100644 --- a/test/routines/level1/xnrm2.hpp +++ b/test/routines/level1/xnrm2.hpp @@ -103,6 +103,16 @@ class TestXnrm2 { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXnrm2(args.n, + buffers.scalar, args.nrm2_offset, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.scalar_size, static_cast(0)); diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp index e7db434e..09b53839 100644 --- a/test/routines/level1/xscal.hpp +++ b/test/routines/level1/xscal.hpp @@ -97,6 +97,15 @@ class TestXscal { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXscal(args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size, static_cast(0)); diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp index 64feb744..0d6fe451 100644 --- a/test/routines/level1/xswap.hpp +++ b/test/routines/level1/xswap.hpp @@ -103,6 +103,16 @@ class TestXswap { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXswap(args.n, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size + args.y_size, static_cast(0)); diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index fb36d7f2..77abcfff 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -123,6 +123,19 @@ class TestXgbmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXgbmv(args.layout, + convertToCUBLAS(args.a_transpose), + args.m, args.n, args.kl, args.ku, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index 4654838e..c0c59152 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -123,6 +123,19 @@ class TestXgemv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXgemv(args.layout, + convertToCUBLAS(args.a_transpose), + args.m, args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 9d1dec13..7fe37e1a 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -117,6 +117,18 @@ class TestXger { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXger(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index efa72744..b50cf672 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -117,6 +117,18 @@ class TestXgerc { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXgerc(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index cb14636e..1ba83107 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -117,6 +117,18 @@ class TestXgeru { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXgeru(args.layout, + args.m, args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index f41cc572..2faf86d9 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -117,6 +117,19 @@ class TestXhbmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhbmv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.kl, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index 9f5aca00..b2b6b337 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -117,6 +117,19 @@ class TestXhemv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhemv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index ef0afd1c..980e8d8b 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -110,6 +110,18 @@ class TestXher { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXher(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index d4b06c49..e60486a8 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -117,6 +117,19 @@ class TestXher2 { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXher2(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index 52f70dc9..40ec5475 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -117,6 +117,19 @@ class TestXhpmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhpmv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.ap_mat, args.ap_offset, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 39112e49..8148c253 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -110,6 +110,18 @@ class TestXhpr { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhpr(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.ap_mat, args.ap_offset); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.ap_size, static_cast(0)); diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index 21f0970a..651989a4 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -117,6 +117,19 @@ class TestXhpr2 { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhpr2(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.ap_mat, args.ap_offset); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.ap_size, static_cast(0)); diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index 94e60dd2..efcdbe34 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -117,6 +117,19 @@ class TestXsbmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsbmv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.kl, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index 02bfd4e3..c7d3d348 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -117,6 +117,19 @@ class TestXspmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXspmv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.ap_mat, args.ap_offset, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 9d992eb2..8d50074c 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -110,6 +110,18 @@ class TestXspr { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXspr(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.ap_mat, args.ap_offset); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.ap_size, static_cast(0)); diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index 520bf412..2ee9538a 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -117,6 +117,19 @@ class TestXspr2 { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXspr2(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.ap_mat, args.ap_offset); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.ap_size, static_cast(0)); diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index 130fee49..9411fa8d 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -117,6 +117,19 @@ class TestXsymv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsymv(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, args.beta, + buffers.y_vec, args.y_offset, args.y_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index 2eb07f9b..8c62f586 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -110,6 +110,18 @@ class TestXsyr { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsyr(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index 5c3598c5..80838174 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -117,6 +117,19 @@ class TestXsyr2 { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsyr2(args.layout, + convertToCUBLAS(args.triangle), + args.n, args.alpha, + buffers.x_vec, args.x_offset, args.x_inc, + buffers.y_vec, args.y_offset, args.y_inc, + buffers.a_mat, args.a_offset, args.a_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.a_size, static_cast(0)); diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index 7ef67424..9aff2cea 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -113,6 +113,20 @@ class TestXtbmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtbmv(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.n, args.kl, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size, static_cast(0)); diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index 6cea7061..e950b892 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -113,6 +113,20 @@ class TestXtpmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtpmv(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.n, + buffers.ap_mat, args.ap_offset, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size, static_cast(0)); diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index 7c97c966..a773b1ca 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -113,6 +113,20 @@ class TestXtrmv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtrmv(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.n, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size, static_cast(0)); diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 18a3cef5..4428271a 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -128,6 +128,20 @@ class TestXtrsv { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtrsv(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.n, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.x_size, static_cast(0)); diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index d6ad98f9..36fa2f43 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -127,6 +127,20 @@ class TestXgemm { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXgemm(args.layout, + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.b_transpose), + args.m, args.n, args.k, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index beadf62d..9400a1fc 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -127,6 +127,20 @@ class TestXhemm { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXhemm(args.layout, + convertToCUBLAS(args.side), + convertToCUBLAS(args.triangle), + args.m, args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index b5d22579..b341c4d7 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -128,6 +128,20 @@ class TestXher2k { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXher2k(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + args.n, args.k, alpha2, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index 558f4e76..676d5286 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -115,6 +115,19 @@ class TestXherk { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXherk(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 704a8f9e..4888091b 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -127,6 +127,20 @@ class TestXsymm { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsymm(args.layout, + convertToCUBLAS(args.side), + convertToCUBLAS(args.triangle), + args.m, args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index c321b9cf..2fc4090c 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -125,6 +125,20 @@ class TestXsyr2k { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsyr2k(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 00a3013d..2ee24a2e 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -115,6 +115,19 @@ class TestXsyrk { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXsyrk(args.layout, + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + args.n, args.k, args.alpha, + buffer.a_mat, args.a_offset, args.a_ld, args.beta, + buffer.c_mat, args.c_offset, args.c_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index 660001df..abf77db9 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -119,6 +119,21 @@ class TestXtrmm { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtrmm(args.layout, + convertToCUBLAS(args.side), + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.m, args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.b_size, static_cast(0)); diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index 9e8b9565..10b216cc 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -130,6 +130,21 @@ class TestXtrsm { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto status = cublasXtrsm(args.layout, + convertToCUBLAS(args.side), + convertToCUBLAS(args.triangle), + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.diagonal), + args.m, args.n, args.alpha, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat, args.b_offset, args.b_ld); + if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.b_size, static_cast(0)); diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp index d8b3837c..add6c1e1 100644 --- a/test/routines/levelx/xaxpybatched.hpp +++ b/test/routines/levelx/xaxpybatched.hpp @@ -125,6 +125,19 @@ class TestXaxpyBatched { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto status = cublasXaxpy(args.n, args.alphas[batch], + buffers.x_vec, args.x_offsets[batch], args.x_inc, + buffers.y_vec, args.y_offsets[batch], args.y_inc); + if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; } + } + return StatusCode::kSuccess; + } + #endif + // Describes how to download the results of the computation static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.y_size, static_cast(0)); diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp index e13e9382..ae8630c0 100644 --- a/test/routines/levelx/xgemmbatched.hpp +++ b/test/routines/levelx/xgemmbatched.hpp @@ -160,6 +160,23 @@ class TestXgemmBatched { } #endif + // Describes how to run the cuBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CUBLAS + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { + auto status = cublasXgemm(args.layout, + convertToCUBLAS(args.a_transpose), + convertToCUBLAS(args.b_transpose), + args.m, args.n, args.k, args.alphas[batch], + buffers.a_mat, args.a_offsets[batch], args.a_ld, + buffers.b_mat, args.b_offsets[batch], args.b_ld, args.betas[batch], + buffers.c_mat, args.c_offsets[batch], args.c_ld); + if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; } + } + return StatusCode::kSuccess; + } + #endif + // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { std::vector result(args.c_size, static_cast(0)); diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index ffb484b0..ac9e9a2d 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -192,6 +192,9 @@ class TestXinvert { static StatusCode RunReference2(const Arguments &args, BuffersHost &buffers_host, Queue&) { return RunReference(args, buffers_host); } + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + return StatusCode::kUnknownError; + } // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp index d5973b4c..2e77e6f7 100644 --- a/test/routines/levelx/xomatcopy.hpp +++ b/test/routines/levelx/xomatcopy.hpp @@ -151,6 +151,9 @@ class TestXomatcopy { static StatusCode RunReference2(const Arguments &args, BuffersHost &buffers_host, Queue&) { return RunReference(args, buffers_host); } + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + return StatusCode::kUnknownError; + } // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { -- cgit v1.2.3 From f24c142948fc71d8b37826c1275259668fe0d0e5 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 11 Apr 2017 21:50:18 +0200 Subject: Made compilation of the cuBLAS wrapper work properly --- scripts/generator/generator/cpp.py | 1 + scripts/generator/generator/routine.py | 12 +- test/correctness/misc/override_parameters.cpp | 6 +- test/routines/level2/xher.hpp | 2 +- test/routines/level2/xhpr.hpp | 2 +- test/routines/level3/xher2k.hpp | 3 +- test/routines/level3/xherk.hpp | 2 +- test/routines/level3/xsyrk.hpp | 4 +- test/routines/levelx/xinvert.hpp | 5 +- test/routines/levelx/xomatcopy.hpp | 3 +- test/wrapper_cublas.hpp | 158 ++++++++++++++++++++++++-- 11 files changed, 171 insertions(+), 27 deletions(-) (limited to 'test') diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 7c695dc8..79d6b2a1 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -319,6 +319,7 @@ def wrapper_cublas(routine): # Calls the cuBLAS routine result += " cublasHandle_t handle;" + NL + result += " if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; }" + NL result += " auto status = cublas" + flavour.name_cublas() + routine.name + "(handle, " result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL result += " cublasDestroy(handle);" + NL diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index b1db484f..a7abfde5 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -347,7 +347,12 @@ class Routine: """As above but for cuBLAS the wrapper""" prefix = "const " if name in self.inputs else "" if name in self.inputs or name in self.outputs: - if flavour.precision_name in ["C", "Z"]: + if name in self.index_buffers(): + a = ["reinterpret_cast(&" + name + "_buffer[" + name + "_offset])"] + elif name in self.outputs and flavour.name in ["Sc", "Dz"]: + dtype = "float" if flavour.name == "Sc" else "double" + a = ["reinterpret_cast<" + dtype + "*>(&" + name + "_buffer[" + name + "_offset])"] + elif flavour.precision_name in ["C", "Z"]: cuda_complex = "cuDoubleComplex" if flavour.precision_name == "Z" else "cuComplex" a = ["reinterpret_cast<" + prefix + cuda_complex + "*>" + "(&" + name + "_buffer[" + name + "_offset])"] @@ -358,7 +363,10 @@ class Routine: c = ["static_cast(" + name + "_" + self.postfix(name) + ")"] elif name in ["a", "b", "c"]: c = [name + "_" + self.postfix(name)] - return [", ".join(a + c)] + result = [", ".join(a + c)] + if self.name == "trmm" and name == "a": + result *= 2 + return result return [] def buffer_type(self, name): diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp index e6eebef7..4283c039 100644 --- a/test/correctness/misc/override_parameters.cpp +++ b/test/correctness/misc/override_parameters.cpp @@ -129,15 +129,11 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st // ================================================================================================= } // namespace clblast -// Shortcuts to the clblast namespace -using float2 = clblast::float2; -using double2 = clblast::double2; - // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { auto errors = size_t{0}; errors += clblast::RunOverrideTests(argc, argv, false, "SGEMM"); - errors += clblast::RunOverrideTests(argc, argv, true, "CGEMM"); + errors += clblast::RunOverrideTests(argc, argv, true, "CGEMM"); if (errors > 0) { return 1; } else { return 0; } } diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index 980e8d8b..c313d0f5 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -112,7 +112,7 @@ class TestXher { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS - static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { auto status = cublasXher(args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 8148c253..986059bd 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -112,7 +112,7 @@ class TestXhpr { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS - static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { auto status = cublasXhpr(args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index b341c4d7..2b0fff64 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -130,7 +130,8 @@ class TestXher2k { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS - static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + auto alpha2 = T{args.alpha, args.alpha}; auto status = cublasXher2k(args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index 676d5286..3a676f59 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -117,7 +117,7 @@ class TestXherk { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS - static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { + static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { auto status = cublasXherk(args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 2ee24a2e..34f8f41a 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -122,8 +122,8 @@ class TestXsyrk { convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), args.n, args.k, args.alpha, - buffer.a_mat, args.a_offset, args.a_ld, args.beta, - buffer.c_mat, args.c_offset, args.c_ld); + buffers.a_mat, args.a_offset, args.a_ld, args.beta, + buffers.c_mat, args.c_offset, args.c_ld); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } } #endif diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index ac9e9a2d..cc02a88b 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -16,10 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XINVERT_H_ #define CLBLAST_TEST_ROUTINES_XINVERT_H_ -#include -#include - -#include "utilities/utilities.hpp" +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp index 2e77e6f7..bbf6006c 100644 --- a/test/routines/levelx/xomatcopy.hpp +++ b/test/routines/levelx/xomatcopy.hpp @@ -16,8 +16,7 @@ #ifndef CLBLAST_TEST_ROUTINES_XOMATCOPY_H_ #define CLBLAST_TEST_ROUTINES_XOMATCOPY_H_ -#include -#include +#include "test/routines/common.hpp" namespace clblast { // ================================================================================================= diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp index 22eb3971..4de8451a 100644 --- a/test/wrapper_cublas.hpp +++ b/test/wrapper_cublas.hpp @@ -44,6 +44,7 @@ cublasStatus_t cublasXrotg(float* sa_buffer, const size_t sa_offset, float* sc_buffer, const size_t sc_offset, float* ss_buffer, const size_t ss_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotg(handle, &sa_buffer[sa_offset], &sb_buffer[sb_offset], &sc_buffer[sc_offset], @@ -57,6 +58,7 @@ cublasStatus_t cublasXrotg(double* sa_buffer, const size_t sa_offset, double* sc_buffer, const size_t sc_offset, double* ss_buffer, const size_t ss_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotg(handle, &sa_buffer[sa_offset], &sb_buffer[sb_offset], &sc_buffer[sc_offset], @@ -79,6 +81,7 @@ cublasStatus_t cublasXrotmg(float* sd1_buffer, const size_t sd1_offset, const float* sy1_buffer, const size_t sy1_offset, float* sparam_buffer, const size_t sparam_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset], &sd2_buffer[sd2_offset], &sx1_buffer[sx1_offset], @@ -94,6 +97,7 @@ cublasStatus_t cublasXrotmg(double* sd1_buffer, const size_t sd1_offset, const double* sy1_buffer, const size_t sy1_offset, double* sparam_buffer, const size_t sparam_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset], &sd2_buffer[sd2_offset], &sx1_buffer[sx1_offset], @@ -110,6 +114,7 @@ cublasStatus_t cublasXrot(const size_t n, const float cos, const float sin) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -124,6 +129,7 @@ cublasStatus_t cublasXrot(const size_t n, const double cos, const double sin) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -145,6 +151,7 @@ cublasStatus_t cublasXrotm(const size_t n, float* y_buffer, const size_t y_offset, const size_t y_inc, float* sparam_buffer, const size_t sparam_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotm(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -158,6 +165,7 @@ cublasStatus_t cublasXrotm(const size_t n, double* y_buffer, const size_t y_offset, const size_t y_inc, double* sparam_buffer, const size_t sparam_offset) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotm(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -176,6 +184,7 @@ cublasStatus_t cublasXswap(const size_t n, float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSswap(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -187,6 +196,7 @@ cublasStatus_t cublasXswap(const size_t n, double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDswap(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -198,6 +208,7 @@ cublasStatus_t cublasXswap(const size_t n, float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCswap(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -209,6 +220,7 @@ cublasStatus_t cublasXswap(const size_t n, double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZswap(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -227,6 +239,7 @@ cublasStatus_t cublasXscal(const size_t n, const float alpha, float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSscal(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc)); @@ -237,6 +250,7 @@ cublasStatus_t cublasXscal(const size_t n, const double alpha, double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDscal(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc)); @@ -250,6 +264,7 @@ cublasStatus_t cublasXscal(const size_t n, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCscal(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); @@ -263,6 +278,7 @@ cublasStatus_t cublasXscal(const size_t n, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZscal(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); @@ -285,6 +301,7 @@ cublasStatus_t cublasXcopy(const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScopy(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -296,6 +313,7 @@ cublasStatus_t cublasXcopy(const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDcopy(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); @@ -307,6 +325,7 @@ cublasStatus_t cublasXcopy(const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCcopy(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -318,6 +337,7 @@ cublasStatus_t cublasXcopy(const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZcopy(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); @@ -337,6 +357,7 @@ cublasStatus_t cublasXaxpy(const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSaxpy(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -349,6 +370,7 @@ cublasStatus_t cublasXaxpy(const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDaxpy(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -364,6 +386,7 @@ cublasStatus_t cublasXaxpy(const size_t n, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCaxpy(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -379,6 +402,7 @@ cublasStatus_t cublasXaxpy(const size_t n, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZaxpy(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -405,6 +429,7 @@ cublasStatus_t cublasXdot(const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSdot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -418,6 +443,7 @@ cublasStatus_t cublasXdot(const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDdot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), @@ -445,6 +471,7 @@ cublasStatus_t cublasXdotu(const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCdotu(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -458,6 +485,7 @@ cublasStatus_t cublasXdotu(const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZdotu(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -478,6 +506,7 @@ cublasStatus_t cublasXdotc(const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCdotc(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -491,6 +520,7 @@ cublasStatus_t cublasXdotc(const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZdotc(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), @@ -509,6 +539,7 @@ cublasStatus_t cublasXnrm2(const size_t n, float* nrm2_buffer, const size_t nrm2_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSnrm2(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &nrm2_buffer[nrm2_offset]); @@ -520,6 +551,7 @@ cublasStatus_t cublasXnrm2(const size_t n, double* nrm2_buffer, const size_t nrm2_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDnrm2(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &nrm2_buffer[nrm2_offset]); @@ -531,9 +563,10 @@ cublasStatus_t cublasXnrm2(const size_t n, float2* nrm2_buffer, const size_t nrm2_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScnrm2(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&nrm2_buffer[nrm2_offset])); + reinterpret_cast(&nrm2_buffer[nrm2_offset])); cublasDestroy(handle); return status; } @@ -542,9 +575,10 @@ cublasStatus_t cublasXnrm2(const size_t n, double2* nrm2_buffer, const size_t nrm2_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDznrm2(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&nrm2_buffer[nrm2_offset])); + reinterpret_cast(&nrm2_buffer[nrm2_offset])); cublasDestroy(handle); return status; } @@ -565,6 +599,7 @@ cublasStatus_t cublasXasum(const size_t n, float* asum_buffer, const size_t asum_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSasum(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &asum_buffer[asum_offset]); @@ -576,6 +611,7 @@ cublasStatus_t cublasXasum(const size_t n, double* asum_buffer, const size_t asum_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDasum(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &asum_buffer[asum_offset]); @@ -587,9 +623,10 @@ cublasStatus_t cublasXasum(const size_t n, float2* asum_buffer, const size_t asum_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScasum(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&asum_buffer[asum_offset])); + reinterpret_cast(&asum_buffer[asum_offset])); cublasDestroy(handle); return status; } @@ -598,9 +635,10 @@ cublasStatus_t cublasXasum(const size_t n, double2* asum_buffer, const size_t asum_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDzasum(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&asum_buffer[asum_offset])); + reinterpret_cast(&asum_buffer[asum_offset])); cublasDestroy(handle); return status; } @@ -621,9 +659,10 @@ cublasStatus_t cublasXamax(const size_t n, float* imax_buffer, const size_t imax_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIsamax(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), - &imax_buffer[imax_offset]); + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -632,9 +671,10 @@ cublasStatus_t cublasXamax(const size_t n, double* imax_buffer, const size_t imax_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIdamax(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), - &imax_buffer[imax_offset]); + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -643,9 +683,10 @@ cublasStatus_t cublasXamax(const size_t n, float2* imax_buffer, const size_t imax_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIcamax(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&imax_buffer[imax_offset])); + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -654,9 +695,10 @@ cublasStatus_t cublasXamax(const size_t n, double2* imax_buffer, const size_t imax_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIzamax(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), - reinterpret_cast(&imax_buffer[imax_offset])); + reinterpret_cast(&imax_buffer[imax_offset])); cublasDestroy(handle); return status; } @@ -681,6 +723,7 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, @@ -700,6 +743,7 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, @@ -725,6 +769,7 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha_cuda, @@ -750,6 +795,7 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha_cuda, @@ -780,6 +826,7 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, @@ -799,6 +846,7 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, @@ -824,6 +872,7 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha_cuda, @@ -849,6 +898,7 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha_cuda, @@ -885,6 +935,7 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChemv(handle, triangle, static_cast(n), &alpha_cuda, @@ -910,6 +961,7 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhemv(handle, triangle, static_cast(n), &alpha_cuda, @@ -937,6 +989,7 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChbmv(handle, triangle, static_cast(n), static_cast(k), &alpha_cuda, @@ -962,6 +1015,7 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhbmv(handle, triangle, static_cast(n), static_cast(k), &alpha_cuda, @@ -989,6 +1043,7 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpmv(handle, triangle, static_cast(n), &alpha_cuda, @@ -1014,6 +1069,7 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpmv(handle, triangle, static_cast(n), &alpha_cuda, @@ -1035,6 +1091,7 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsymv(handle, triangle, static_cast(n), &alpha, @@ -1054,6 +1111,7 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsymv(handle, triangle, static_cast(n), &alpha, @@ -1084,6 +1142,7 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, @@ -1103,6 +1162,7 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, @@ -1133,6 +1193,7 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspmv(handle, triangle, static_cast(n), &alpha, @@ -1152,6 +1213,7 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspmv(handle, triangle, static_cast(n), &alpha, @@ -1185,6 +1247,7 @@ cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, @@ -1199,6 +1262,7 @@ cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, @@ -1213,6 +1277,7 @@ cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1227,6 +1292,7 @@ cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1255,6 +1321,7 @@ cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, @@ -1269,6 +1336,7 @@ cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, @@ -1283,6 +1351,7 @@ cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1297,6 +1366,7 @@ cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1325,6 +1395,7 @@ cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], @@ -1339,6 +1410,7 @@ cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], @@ -1353,6 +1425,7 @@ cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), @@ -1367,6 +1440,7 @@ cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), @@ -1395,6 +1469,7 @@ cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, @@ -1409,6 +1484,7 @@ cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, @@ -1423,6 +1499,7 @@ cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1437,6 +1514,7 @@ cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1458,6 +1536,7 @@ cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, @@ -1472,6 +1551,7 @@ cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, @@ -1486,6 +1566,7 @@ cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1500,6 +1581,7 @@ cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, @@ -1521,6 +1603,7 @@ cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t tr float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], @@ -1535,6 +1618,7 @@ cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t t double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], @@ -1549,6 +1633,7 @@ cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t t float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), @@ -1563,6 +1648,7 @@ cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), @@ -1580,6 +1666,7 @@ cublasStatus_t cublasXger(const Layout layout, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1596,6 +1683,7 @@ cublasStatus_t cublasXger(const Layout layout, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), @@ -1625,6 +1713,7 @@ cublasStatus_t cublasXgeru(const Layout layout, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgeru(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -1644,6 +1733,7 @@ cublasStatus_t cublasXgeru(const Layout layout, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgeru(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -1665,6 +1755,7 @@ cublasStatus_t cublasXgerc(const Layout layout, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgerc(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -1684,6 +1775,7 @@ cublasStatus_t cublasXgerc(const Layout layout, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgerc(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), @@ -1701,6 +1793,7 @@ cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, float2* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher(handle, triangle, static_cast(n), &alpha, @@ -1716,6 +1809,7 @@ cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, double2* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher(handle, triangle, static_cast(n), &alpha, @@ -1733,6 +1827,7 @@ cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, float2* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpr(handle, triangle, static_cast(n), &alpha, @@ -1748,6 +1843,7 @@ cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, double2* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpr(handle, triangle, static_cast(n), &alpha, @@ -1769,6 +1865,7 @@ cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher2(handle, triangle, static_cast(n), &alpha_cuda, @@ -1789,6 +1886,7 @@ cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher2(handle, triangle, static_cast(n), &alpha_cuda, @@ -1811,6 +1909,7 @@ cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpr2(handle, triangle, static_cast(n), &alpha_cuda, @@ -1831,6 +1930,7 @@ cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpr2(handle, triangle, static_cast(n), &alpha_cuda, @@ -1849,6 +1949,7 @@ cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr(handle, triangle, static_cast(n), &alpha, @@ -1864,6 +1965,7 @@ cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr(handle, triangle, static_cast(n), &alpha, @@ -1888,6 +1990,7 @@ cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, float* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspr(handle, triangle, static_cast(n), &alpha, @@ -1903,6 +2006,7 @@ cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, double* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspr(handle, triangle, static_cast(n), &alpha, @@ -1928,6 +2032,7 @@ cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr2(handle, triangle, static_cast(n), &alpha, @@ -1945,6 +2050,7 @@ cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr2(handle, triangle, static_cast(n), &alpha, @@ -1972,6 +2078,7 @@ cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, float* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspr2(handle, triangle, static_cast(n), &alpha, @@ -1989,6 +2096,7 @@ cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, double* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspr2(handle, triangle, static_cast(n), &alpha, @@ -2021,6 +2129,7 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, @@ -2040,6 +2149,7 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, @@ -2065,6 +2175,7 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha_cuda, @@ -2090,6 +2201,7 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha_cuda, @@ -2120,6 +2232,7 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, @@ -2139,6 +2252,7 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, @@ -2164,6 +2278,7 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2189,6 +2304,7 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2225,6 +2341,7 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChemm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2250,6 +2367,7 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhemm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2270,6 +2388,7 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, @@ -2287,6 +2406,7 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, @@ -2310,6 +2430,7 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2333,6 +2454,7 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2360,6 +2482,7 @@ cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, float2* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, @@ -2377,6 +2500,7 @@ cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, double2* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, @@ -2397,6 +2521,7 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, @@ -2416,6 +2541,7 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, @@ -2441,6 +2567,7 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2466,6 +2593,7 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2499,6 +2627,7 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2521,6 +2650,7 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2540,10 +2670,12 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con float* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, + &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); cublasDestroy(handle); return status; @@ -2555,10 +2687,12 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con double* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, + &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); cublasDestroy(handle); return status; @@ -2573,10 +2707,12 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; @@ -2591,10 +2727,12 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, + reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); cublasDestroy(handle); return status; @@ -2615,6 +2753,7 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con float* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, @@ -2630,6 +2769,7 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con double* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, @@ -2648,6 +2788,7 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, @@ -2666,6 +2807,7 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); cublasHandle_t handle; + if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, -- cgit v1.2.3 From f7f8ec644f51d16f888b6a7086009b79c0beef8f Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 13 Apr 2017 21:31:27 +0200 Subject: Fixed CUDA malloc and cuBLAS handles: cuBLAS as a performance-reference now works --- scripts/generator/generator/cpp.py | 11 +- scripts/generator/generator/routine.py | 2 +- src/utilities/utilities.hpp | 3 + test/performance/client.cpp | 5 +- test/routines/level1/xamax.hpp | 2 +- test/routines/level1/xasum.hpp | 2 +- test/routines/level1/xaxpy.hpp | 2 +- test/routines/level1/xcopy.hpp | 2 +- test/routines/level1/xdot.hpp | 2 +- test/routines/level1/xdotc.hpp | 2 +- test/routines/level1/xdotu.hpp | 2 +- test/routines/level1/xnrm2.hpp | 2 +- test/routines/level1/xscal.hpp | 2 +- test/routines/level1/xswap.hpp | 2 +- test/routines/level2/xgbmv.hpp | 2 +- test/routines/level2/xgemv.hpp | 2 +- test/routines/level2/xger.hpp | 2 +- test/routines/level2/xgerc.hpp | 2 +- test/routines/level2/xgeru.hpp | 2 +- test/routines/level2/xhbmv.hpp | 2 +- test/routines/level2/xhemv.hpp | 2 +- test/routines/level2/xher.hpp | 2 +- test/routines/level2/xher2.hpp | 2 +- test/routines/level2/xhpmv.hpp | 2 +- test/routines/level2/xhpr.hpp | 2 +- test/routines/level2/xhpr2.hpp | 2 +- test/routines/level2/xsbmv.hpp | 2 +- test/routines/level2/xspmv.hpp | 2 +- test/routines/level2/xspr.hpp | 2 +- test/routines/level2/xspr2.hpp | 2 +- test/routines/level2/xsymv.hpp | 2 +- test/routines/level2/xsyr.hpp | 2 +- test/routines/level2/xsyr2.hpp | 2 +- test/routines/level2/xtbmv.hpp | 2 +- test/routines/level2/xtpmv.hpp | 2 +- test/routines/level2/xtrmv.hpp | 2 +- test/routines/level2/xtrsv.hpp | 2 +- test/routines/level3/xgemm.hpp | 2 +- test/routines/level3/xhemm.hpp | 2 +- test/routines/level3/xher2k.hpp | 2 +- test/routines/level3/xherk.hpp | 2 +- test/routines/level3/xsymm.hpp | 2 +- test/routines/level3/xsyr2k.hpp | 2 +- test/routines/level3/xsyrk.hpp | 2 +- test/routines/level3/xtrmm.hpp | 2 +- test/routines/level3/xtrsm.hpp | 2 +- test/routines/levelx/xaxpybatched.hpp | 2 +- test/routines/levelx/xgemmbatched.hpp | 2 +- test/wrapper_cublas.hpp | 914 ++++++++++++--------------------- test/wrapper_cuda.hpp | 96 ++-- 50 files changed, 442 insertions(+), 677 deletions(-) (limited to 'test') diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py index 79d6b2a1..17e418e3 100644 --- a/scripts/generator/generator/cpp.py +++ b/scripts/generator/generator/cpp.py @@ -318,11 +318,9 @@ def wrapper_cublas(routine): result += " " + scalar + "_cuda.y = " + scalar + ".imag();" + NL # Calls the cuBLAS routine - result += " cublasHandle_t handle;" + NL - result += " if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; }" + NL result += " auto status = cublas" + flavour.name_cublas() + routine.name + "(handle, " result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL - result += " cublasDestroy(handle);" + NL + result += " cudaDeviceSynchronize();" + NL result += " return status;" # There is no cuBLAS available, forward the call to one of the available functions @@ -335,11 +333,10 @@ def wrapper_cublas(routine): # result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL # # Call the float routine - # result += " cublasHandle_t handle;" + NL - # result += " auto status = cublasX" + routine.name + "(handle," + # result += " return cublasX" + routine.name + "(handle," # result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL - # result += " cublasDestroy(handle);" + NL - # result += " return status;" + NL + # result += " cudaDeviceSynchronize();" + NL + # result += " return status;" # # Convert back to half # for buf in routine.outputs: diff --git a/scripts/generator/generator/routine.py b/scripts/generator/generator/routine.py index a7abfde5..1c534611 100644 --- a/scripts/generator/generator/routine.py +++ b/scripts/generator/generator/routine.py @@ -884,6 +884,6 @@ class Routine: if def_only: result += flavour.name result += ">\n" - result += "cublasStatus_t cublasX" + self.name + template + "(" + result += "cublasStatus_t cublasX" + self.name + template + "(cublasHandle_t handle, " result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")" return result diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index 7aadb983..b40ec541 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -193,6 +193,9 @@ struct Arguments { size_t step = 1; size_t num_steps = 0; size_t num_runs = 10; + #ifdef CLBLAST_REF_CUBLAS + void* cublas_handle; // cublasHandle_t + #endif // Common arguments size_t platform_id = 0; size_t device_id = 0; diff --git a/test/performance/client.cpp b/test/performance/client.cpp index a2f0f9f4..dc98ffbd 100644 --- a/test/performance/client.cpp +++ b/test/performance/client.cpp @@ -183,7 +183,7 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) if (args.compare_clblas) { clblasSetup(); } #endif #ifdef CLBLAST_REF_CUBLAS - cudaSetDevice(static_cast(args.device_id)); + if (args.compare_cublas) { cublasSetup(args); } #endif // Iterates over all "num_step" values jumping by "step" each time @@ -272,6 +272,9 @@ void Client::PerformanceTest(Arguments &args, const SetMetric set_sizes) #ifdef CLBLAST_REF_CLBLAS if (args.compare_clblas) { clblasTeardown(); } #endif + #ifdef CLBLAST_REF_CUBLAS + if (args.compare_cublas) { cublasTeardown(args); } + #endif } // ================================================================================================= diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp index dcd48a47..04bdaa3d 100644 --- a/test/routines/level1/xamax.hpp +++ b/test/routines/level1/xamax.hpp @@ -106,7 +106,7 @@ class TestXamax { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXamax(args.n, + auto status = cublasXamax(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.imax_offset, buffers.x_vec, args.x_offset, args.x_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp index e7e41fe5..6add9c64 100644 --- a/test/routines/level1/xasum.hpp +++ b/test/routines/level1/xasum.hpp @@ -106,7 +106,7 @@ class TestXasum { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXasum(args.n, + auto status = cublasXasum(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.asum_offset, buffers.x_vec, args.x_offset, args.x_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp index 98f0e380..17cae6ad 100644 --- a/test/routines/level1/xaxpy.hpp +++ b/test/routines/level1/xaxpy.hpp @@ -107,7 +107,7 @@ class TestXaxpy { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXaxpy(args.n, args.alpha, + auto status = cublasXaxpy(reinterpret_cast(args.cublas_handle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp index 65e498ee..7a5c99b8 100644 --- a/test/routines/level1/xcopy.hpp +++ b/test/routines/level1/xcopy.hpp @@ -106,7 +106,7 @@ class TestXcopy { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXcopy(args.n, + auto status = cublasXcopy(reinterpret_cast(args.cublas_handle), args.n, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp index c95b16ef..1ea25994 100644 --- a/test/routines/level1/xdot.hpp +++ b/test/routines/level1/xdot.hpp @@ -113,7 +113,7 @@ class TestXdot { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXdot(args.n, + auto status = cublasXdot(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.dot_offset, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp index 0c99be25..c800c1f5 100644 --- a/test/routines/level1/xdotc.hpp +++ b/test/routines/level1/xdotc.hpp @@ -113,7 +113,7 @@ class TestXdotc { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXdotc(args.n, + auto status = cublasXdotc(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.dot_offset, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp index bf6bcd80..3545a3a6 100644 --- a/test/routines/level1/xdotu.hpp +++ b/test/routines/level1/xdotu.hpp @@ -113,7 +113,7 @@ class TestXdotu { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXdotu(args.n, + auto status = cublasXdotu(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.dot_offset, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp index 096604d1..1db70537 100644 --- a/test/routines/level1/xnrm2.hpp +++ b/test/routines/level1/xnrm2.hpp @@ -106,7 +106,7 @@ class TestXnrm2 { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXnrm2(args.n, + auto status = cublasXnrm2(reinterpret_cast(args.cublas_handle), args.n, buffers.scalar, args.nrm2_offset, buffers.x_vec, args.x_offset, args.x_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp index 09b53839..efa0988d 100644 --- a/test/routines/level1/xscal.hpp +++ b/test/routines/level1/xscal.hpp @@ -100,7 +100,7 @@ class TestXscal { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXscal(args.n, args.alpha, + auto status = cublasXscal(reinterpret_cast(args.cublas_handle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } } diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp index 0d6fe451..d778cc23 100644 --- a/test/routines/level1/xswap.hpp +++ b/test/routines/level1/xswap.hpp @@ -106,7 +106,7 @@ class TestXswap { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXswap(args.n, + auto status = cublasXswap(reinterpret_cast(args.cublas_handle), args.n, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc); if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; } diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp index 77abcfff..23138c77 100644 --- a/test/routines/level2/xgbmv.hpp +++ b/test/routines/level2/xgbmv.hpp @@ -126,7 +126,7 @@ class TestXgbmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXgbmv(args.layout, + auto status = cublasXgbmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.a_transpose), args.m, args.n, args.kl, args.ku, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp index c0c59152..0ee53b80 100644 --- a/test/routines/level2/xgemv.hpp +++ b/test/routines/level2/xgemv.hpp @@ -126,7 +126,7 @@ class TestXgemv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXgemv(args.layout, + auto status = cublasXgemv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.a_transpose), args.m, args.n, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp index 7fe37e1a..92a1a2ae 100644 --- a/test/routines/level2/xger.hpp +++ b/test/routines/level2/xger.hpp @@ -120,7 +120,7 @@ class TestXger { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXger(args.layout, + auto status = cublasXger(reinterpret_cast(args.cublas_handle), args.layout, args.m, args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc, diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp index b50cf672..5d899398 100644 --- a/test/routines/level2/xgerc.hpp +++ b/test/routines/level2/xgerc.hpp @@ -120,7 +120,7 @@ class TestXgerc { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXgerc(args.layout, + auto status = cublasXgerc(reinterpret_cast(args.cublas_handle), args.layout, args.m, args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc, diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp index 1ba83107..96dab22e 100644 --- a/test/routines/level2/xgeru.hpp +++ b/test/routines/level2/xgeru.hpp @@ -120,7 +120,7 @@ class TestXgeru { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXgeru(args.layout, + auto status = cublasXgeru(reinterpret_cast(args.cublas_handle), args.layout, args.m, args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, buffers.y_vec, args.y_offset, args.y_inc, diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp index 2faf86d9..b6844744 100644 --- a/test/routines/level2/xhbmv.hpp +++ b/test/routines/level2/xhbmv.hpp @@ -120,7 +120,7 @@ class TestXhbmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhbmv(args.layout, + auto status = cublasXhbmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.kl, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp index b2b6b337..e1f23592 100644 --- a/test/routines/level2/xhemv.hpp +++ b/test/routines/level2/xhemv.hpp @@ -120,7 +120,7 @@ class TestXhemv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhemv(args.layout, + auto status = cublasXhemv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp index c313d0f5..1ac1247b 100644 --- a/test/routines/level2/xher.hpp +++ b/test/routines/level2/xher.hpp @@ -113,7 +113,7 @@ class TestXher { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXher(args.layout, + auto status = cublasXher(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp index e60486a8..18ccc1ac 100644 --- a/test/routines/level2/xher2.hpp +++ b/test/routines/level2/xher2.hpp @@ -120,7 +120,7 @@ class TestXher2 { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXher2(args.layout, + auto status = cublasXher2(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp index 40ec5475..ad91fe15 100644 --- a/test/routines/level2/xhpmv.hpp +++ b/test/routines/level2/xhpmv.hpp @@ -120,7 +120,7 @@ class TestXhpmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhpmv(args.layout, + auto status = cublasXhpmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.ap_mat, args.ap_offset, diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp index 986059bd..f9d580cd 100644 --- a/test/routines/level2/xhpr.hpp +++ b/test/routines/level2/xhpr.hpp @@ -113,7 +113,7 @@ class TestXhpr { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhpr(args.layout, + auto status = cublasXhpr(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp index 651989a4..f946ba5c 100644 --- a/test/routines/level2/xhpr2.hpp +++ b/test/routines/level2/xhpr2.hpp @@ -120,7 +120,7 @@ class TestXhpr2 { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhpr2(args.layout, + auto status = cublasXhpr2(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp index efcdbe34..6481d19b 100644 --- a/test/routines/level2/xsbmv.hpp +++ b/test/routines/level2/xsbmv.hpp @@ -120,7 +120,7 @@ class TestXsbmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsbmv(args.layout, + auto status = cublasXsbmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.kl, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp index c7d3d348..9815dbee 100644 --- a/test/routines/level2/xspmv.hpp +++ b/test/routines/level2/xspmv.hpp @@ -120,7 +120,7 @@ class TestXspmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXspmv(args.layout, + auto status = cublasXspmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.ap_mat, args.ap_offset, diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp index 8d50074c..01a50c38 100644 --- a/test/routines/level2/xspr.hpp +++ b/test/routines/level2/xspr.hpp @@ -113,7 +113,7 @@ class TestXspr { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXspr(args.layout, + auto status = cublasXspr(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp index 2ee9538a..55f8a141 100644 --- a/test/routines/level2/xspr2.hpp +++ b/test/routines/level2/xspr2.hpp @@ -120,7 +120,7 @@ class TestXspr2 { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXspr2(args.layout, + auto status = cublasXspr2(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp index 9411fa8d..aec0dfb0 100644 --- a/test/routines/level2/xsymv.hpp +++ b/test/routines/level2/xsymv.hpp @@ -120,7 +120,7 @@ class TestXsymv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsymv(args.layout, + auto status = cublasXsymv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.a_mat, args.a_offset, args.a_ld, diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp index 8c62f586..78b686d8 100644 --- a/test/routines/level2/xsyr.hpp +++ b/test/routines/level2/xsyr.hpp @@ -113,7 +113,7 @@ class TestXsyr { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsyr(args.layout, + auto status = cublasXsyr(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp index 80838174..38aa4f43 100644 --- a/test/routines/level2/xsyr2.hpp +++ b/test/routines/level2/xsyr2.hpp @@ -120,7 +120,7 @@ class TestXsyr2 { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsyr2(args.layout, + auto status = cublasXsyr2(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), args.n, args.alpha, buffers.x_vec, args.x_offset, args.x_inc, diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp index 9aff2cea..8c7aa381 100644 --- a/test/routines/level2/xtbmv.hpp +++ b/test/routines/level2/xtbmv.hpp @@ -116,7 +116,7 @@ class TestXtbmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtbmv(args.layout, + auto status = cublasXtbmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.diagonal), diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp index e950b892..3afab978 100644 --- a/test/routines/level2/xtpmv.hpp +++ b/test/routines/level2/xtpmv.hpp @@ -116,7 +116,7 @@ class TestXtpmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtpmv(args.layout, + auto status = cublasXtpmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.diagonal), diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp index a773b1ca..2b71f151 100644 --- a/test/routines/level2/xtrmv.hpp +++ b/test/routines/level2/xtrmv.hpp @@ -116,7 +116,7 @@ class TestXtrmv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtrmv(args.layout, + auto status = cublasXtrmv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.diagonal), diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp index 4428271a..85b50e85 100644 --- a/test/routines/level2/xtrsv.hpp +++ b/test/routines/level2/xtrsv.hpp @@ -131,7 +131,7 @@ class TestXtrsv { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtrsv(args.layout, + auto status = cublasXtrsv(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.diagonal), diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp index 36fa2f43..7e0ead6d 100644 --- a/test/routines/level3/xgemm.hpp +++ b/test/routines/level3/xgemm.hpp @@ -130,7 +130,7 @@ class TestXgemm { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXgemm(args.layout, + auto status = cublasXgemm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.b_transpose), args.m, args.n, args.k, args.alpha, diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp index 9400a1fc..a89617b5 100644 --- a/test/routines/level3/xhemm.hpp +++ b/test/routines/level3/xhemm.hpp @@ -130,7 +130,7 @@ class TestXhemm { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXhemm(args.layout, + auto status = cublasXhemm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.side), convertToCUBLAS(args.triangle), args.m, args.n, args.alpha, diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp index 2b0fff64..55e6d894 100644 --- a/test/routines/level3/xher2k.hpp +++ b/test/routines/level3/xher2k.hpp @@ -132,7 +132,7 @@ class TestXher2k { #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { auto alpha2 = T{args.alpha, args.alpha}; - auto status = cublasXher2k(args.layout, + auto status = cublasXher2k(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), args.n, args.k, alpha2, diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index 3a676f59..3e1e7e02 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -118,7 +118,7 @@ class TestXherk { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXherk(args.layout, + auto status = cublasXherk(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), args.n, args.k, args.alpha, diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp index 4888091b..5d840d40 100644 --- a/test/routines/level3/xsymm.hpp +++ b/test/routines/level3/xsymm.hpp @@ -130,7 +130,7 @@ class TestXsymm { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsymm(args.layout, + auto status = cublasXsymm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.side), convertToCUBLAS(args.triangle), args.m, args.n, args.alpha, diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp index 2fc4090c..4a4a2f10 100644 --- a/test/routines/level3/xsyr2k.hpp +++ b/test/routines/level3/xsyr2k.hpp @@ -128,7 +128,7 @@ class TestXsyr2k { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsyr2k(args.layout, + auto status = cublasXsyr2k(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), args.n, args.k, args.alpha, diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp index 34f8f41a..90e46727 100644 --- a/test/routines/level3/xsyrk.hpp +++ b/test/routines/level3/xsyrk.hpp @@ -118,7 +118,7 @@ class TestXsyrk { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXsyrk(args.layout, + auto status = cublasXsyrk(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), args.n, args.k, args.alpha, diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp index abf77db9..acc00e01 100644 --- a/test/routines/level3/xtrmm.hpp +++ b/test/routines/level3/xtrmm.hpp @@ -122,7 +122,7 @@ class TestXtrmm { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtrmm(args.layout, + auto status = cublasXtrmm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.side), convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp index 10b216cc..d63c9d79 100644 --- a/test/routines/level3/xtrsm.hpp +++ b/test/routines/level3/xtrsm.hpp @@ -133,7 +133,7 @@ class TestXtrsm { // Describes how to run the cuBLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { - auto status = cublasXtrsm(args.layout, + auto status = cublasXtrsm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.side), convertToCUBLAS(args.triangle), convertToCUBLAS(args.a_transpose), diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp index add6c1e1..5385e86e 100644 --- a/test/routines/levelx/xaxpybatched.hpp +++ b/test/routines/levelx/xaxpybatched.hpp @@ -129,7 +129,7 @@ class TestXaxpyBatched { #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { - auto status = cublasXaxpy(args.n, args.alphas[batch], + auto status = cublasXaxpy(reinterpret_cast(args.cublas_handle), args.n, args.alphas[batch], buffers.x_vec, args.x_offsets[batch], args.x_inc, buffers.y_vec, args.y_offsets[batch], args.y_inc); if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; } diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp index ae8630c0..ebfd8b19 100644 --- a/test/routines/levelx/xgemmbatched.hpp +++ b/test/routines/levelx/xgemmbatched.hpp @@ -164,7 +164,7 @@ class TestXgemmBatched { #ifdef CLBLAST_REF_CUBLAS static StatusCode RunReference3(const Arguments &args, BuffersCUDA &buffers, Queue &) { for (auto batch = size_t{0}; batch < args.batch_count; ++batch) { - auto status = cublasXgemm(args.layout, + auto status = cublasXgemm(reinterpret_cast(args.cublas_handle), args.layout, convertToCUBLAS(args.a_transpose), convertToCUBLAS(args.b_transpose), args.m, args.n, args.k, args.alphas[batch], diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp index 4de8451a..35b1b9c6 100644 --- a/test/wrapper_cublas.hpp +++ b/test/wrapper_cublas.hpp @@ -34,258 +34,226 @@ cublasSideMode_t convertToCUBLAS(const Side v) { return (v == Side::kLeft) ? CUB // Forwards the cuBLAS calls for SROTG/DROTG template -cublasStatus_t cublasXrotg(T* sa_buffer, const size_t sa_offset, +cublasStatus_t cublasXrotg(cublasHandle_t handle, T* sa_buffer, const size_t sa_offset, T* sb_buffer, const size_t sb_offset, T* sc_buffer, const size_t sc_offset, T* ss_buffer, const size_t ss_offset); template <> -cublasStatus_t cublasXrotg(float* sa_buffer, const size_t sa_offset, +cublasStatus_t cublasXrotg(cublasHandle_t handle, float* sa_buffer, const size_t sa_offset, float* sb_buffer, const size_t sb_offset, float* sc_buffer, const size_t sc_offset, float* ss_buffer, const size_t ss_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotg(handle, &sa_buffer[sa_offset], &sb_buffer[sb_offset], &sc_buffer[sc_offset], &ss_buffer[ss_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXrotg(double* sa_buffer, const size_t sa_offset, +cublasStatus_t cublasXrotg(cublasHandle_t handle, double* sa_buffer, const size_t sa_offset, double* sb_buffer, const size_t sb_offset, double* sc_buffer, const size_t sc_offset, double* ss_buffer, const size_t ss_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotg(handle, &sa_buffer[sa_offset], &sb_buffer[sb_offset], &sc_buffer[sc_offset], &ss_buffer[ss_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SROTMG/DROTMG template -cublasStatus_t cublasXrotmg(T* sd1_buffer, const size_t sd1_offset, +cublasStatus_t cublasXrotmg(cublasHandle_t handle, T* sd1_buffer, const size_t sd1_offset, T* sd2_buffer, const size_t sd2_offset, T* sx1_buffer, const size_t sx1_offset, const T* sy1_buffer, const size_t sy1_offset, T* sparam_buffer, const size_t sparam_offset); template <> -cublasStatus_t cublasXrotmg(float* sd1_buffer, const size_t sd1_offset, +cublasStatus_t cublasXrotmg(cublasHandle_t handle, float* sd1_buffer, const size_t sd1_offset, float* sd2_buffer, const size_t sd2_offset, float* sx1_buffer, const size_t sx1_offset, const float* sy1_buffer, const size_t sy1_offset, float* sparam_buffer, const size_t sparam_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset], &sd2_buffer[sd2_offset], &sx1_buffer[sx1_offset], &sy1_buffer[sy1_offset], &sparam_buffer[sparam_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXrotmg(double* sd1_buffer, const size_t sd1_offset, +cublasStatus_t cublasXrotmg(cublasHandle_t handle, double* sd1_buffer, const size_t sd1_offset, double* sd2_buffer, const size_t sd2_offset, double* sx1_buffer, const size_t sx1_offset, const double* sy1_buffer, const size_t sy1_offset, double* sparam_buffer, const size_t sparam_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset], &sd2_buffer[sd2_offset], &sx1_buffer[sx1_offset], &sy1_buffer[sy1_offset], &sparam_buffer[sparam_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SROT/DROT -cublasStatus_t cublasXrot(const size_t n, +cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n, float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc, const float cos, const float sin) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &cos, &sin); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXrot(const size_t n, +cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n, double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc, const double cos, const double sin) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &cos, &sin); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SROTM/DROTM template -cublasStatus_t cublasXrotm(const size_t n, +cublasStatus_t cublasXrotm(cublasHandle_t handle, const size_t n, T* x_buffer, const size_t x_offset, const size_t x_inc, T* y_buffer, const size_t y_offset, const size_t y_inc, T* sparam_buffer, const size_t sparam_offset); template <> -cublasStatus_t cublasXrotm(const size_t n, +cublasStatus_t cublasXrotm(cublasHandle_t handle, const size_t n, float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc, float* sparam_buffer, const size_t sparam_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSrotm(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &sparam_buffer[sparam_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXrotm(const size_t n, +cublasStatus_t cublasXrotm(cublasHandle_t handle, const size_t n, double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc, double* sparam_buffer, const size_t sparam_offset) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDrotm(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &sparam_buffer[sparam_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP template -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, T* x_buffer, const size_t x_offset, const size_t x_inc, T* y_buffer, const size_t y_offset, const size_t y_inc); template <> -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSswap(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDswap(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCswap(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZswap(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXswap(const size_t n, +cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n, half* x_buffer, const size_t x_offset, const size_t x_inc, half* y_buffer, const size_t y_offset, const size_t y_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; } // Forwards the cuBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL -cublasStatus_t cublasXscal(const size_t n, +cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n, const float alpha, float* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSscal(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXscal(const size_t n, +cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n, const double alpha, double* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDscal(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXscal(const size_t n, +cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n, const float2 alpha, float2* x_buffer, const size_t x_offset, const size_t x_inc) { cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCscal(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXscal(const size_t n, +cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n, const double2 alpha, double2* x_buffer, const size_t x_offset, const size_t x_inc) { cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZscal(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXscal(const size_t n, +cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n, const half alpha, half* x_buffer, const size_t x_offset, const size_t x_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; @@ -293,124 +261,108 @@ cublasStatus_t cublasXscal(const size_t n, // Forwards the cuBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY template -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const T* x_buffer, const size_t x_offset, const size_t x_inc, T* y_buffer, const size_t y_offset, const size_t y_inc); template <> -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScopy(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDcopy(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCcopy(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZcopy(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXcopy(const size_t n, +cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n, const half* x_buffer, const size_t x_offset, const size_t x_inc, half* y_buffer, const size_t y_offset, const size_t y_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; } // Forwards the cuBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY -cublasStatus_t cublasXaxpy(const size_t n, +cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSaxpy(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXaxpy(const size_t n, +cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDaxpy(handle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXaxpy(const size_t n, +cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* y_buffer, const size_t y_offset, const size_t y_inc) { cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCaxpy(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXaxpy(const size_t n, +cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* y_buffer, const size_t y_offset, const size_t y_inc) { cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZaxpy(handle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXaxpy(const size_t n, +cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, half* y_buffer, const size_t y_offset, const size_t y_inc) { @@ -419,40 +371,36 @@ cublasStatus_t cublasXaxpy(const size_t n, // Forwards the cuBLAS calls for SDOT/DDOT template -cublasStatus_t cublasXdot(const size_t n, +cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n, T* dot_buffer, const size_t dot_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc, const T* y_buffer, const size_t y_offset, const size_t y_inc); template <> -cublasStatus_t cublasXdot(const size_t n, +cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n, float* dot_buffer, const size_t dot_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSdot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &dot_buffer[dot_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXdot(const size_t n, +cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n, double* dot_buffer, const size_t dot_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDdot(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &dot_buffer[dot_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXdot(const size_t n, +cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n, half* dot_buffer, const size_t dot_offset, const half* x_buffer, const size_t x_offset, const size_t x_inc, const half* y_buffer, const size_t y_offset, const size_t y_inc) { @@ -461,129 +409,113 @@ cublasStatus_t cublasXdot(const size_t n, // Forwards the cuBLAS calls for CDOTU/ZDOTU template -cublasStatus_t cublasXdotu(const size_t n, +cublasStatus_t cublasXdotu(cublasHandle_t handle, const size_t n, T* dot_buffer, const size_t dot_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc, const T* y_buffer, const size_t y_offset, const size_t y_inc); template <> -cublasStatus_t cublasXdotu(const size_t n, +cublasStatus_t cublasXdotu(cublasHandle_t handle, const size_t n, float2* dot_buffer, const size_t dot_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCdotu(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXdotu(const size_t n, +cublasStatus_t cublasXdotu(cublasHandle_t handle, const size_t n, double2* dot_buffer, const size_t dot_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZdotu(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CDOTC/ZDOTC template -cublasStatus_t cublasXdotc(const size_t n, +cublasStatus_t cublasXdotc(cublasHandle_t handle, const size_t n, T* dot_buffer, const size_t dot_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc, const T* y_buffer, const size_t y_offset, const size_t y_inc); template <> -cublasStatus_t cublasXdotc(const size_t n, +cublasStatus_t cublasXdotc(cublasHandle_t handle, const size_t n, float2* dot_buffer, const size_t dot_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc, const float2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCdotc(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXdotc(const size_t n, +cublasStatus_t cublasXdotc(cublasHandle_t handle, const size_t n, double2* dot_buffer, const size_t dot_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc, const double2* y_buffer, const size_t y_offset, const size_t y_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZdotc(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&dot_buffer[dot_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2 template -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, T* nrm2_buffer, const size_t nrm2_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, float* nrm2_buffer, const size_t nrm2_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSnrm2(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &nrm2_buffer[nrm2_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, double* nrm2_buffer, const size_t nrm2_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDnrm2(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &nrm2_buffer[nrm2_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, float2* nrm2_buffer, const size_t nrm2_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScnrm2(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&nrm2_buffer[nrm2_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, double2* nrm2_buffer, const size_t nrm2_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDznrm2(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&nrm2_buffer[nrm2_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXnrm2(const size_t n, +cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n, half* nrm2_buffer, const size_t nrm2_offset, const half* x_buffer, const size_t x_offset, const size_t x_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; @@ -591,59 +523,51 @@ cublasStatus_t cublasXnrm2(const size_t n, // Forwards the cuBLAS calls for SASUM/DASUM/ScASUM/DzASUM template -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, T* asum_buffer, const size_t asum_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, float* asum_buffer, const size_t asum_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSasum(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &asum_buffer[asum_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, double* asum_buffer, const size_t asum_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDasum(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), &asum_buffer[asum_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, float2* asum_buffer, const size_t asum_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasScasum(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&asum_buffer[asum_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, double2* asum_buffer, const size_t asum_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDzasum(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&asum_buffer[asum_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXasum(const size_t n, +cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n, half* asum_buffer, const size_t asum_offset, const half* x_buffer, const size_t x_offset, const size_t x_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; @@ -651,59 +575,51 @@ cublasStatus_t cublasXasum(const size_t n, // Forwards the cuBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX template -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, T* imax_buffer, const size_t imax_offset, const T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, float* imax_buffer, const size_t imax_offset, const float* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIsamax(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), reinterpret_cast(&imax_buffer[imax_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, double* imax_buffer, const size_t imax_offset, const double* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIdamax(handle, static_cast(n), &x_buffer[x_offset], static_cast(x_inc), reinterpret_cast(&imax_buffer[imax_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, float2* imax_buffer, const size_t imax_offset, const float2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIcamax(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&imax_buffer[imax_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, double2* imax_buffer, const size_t imax_offset, const double2* x_buffer, const size_t x_offset, const size_t x_inc) { - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasIzamax(handle, static_cast(n), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&imax_buffer[imax_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXamax(const size_t n, +cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n, half* imax_buffer, const size_t imax_offset, const half* x_buffer, const size_t x_offset, const size_t x_inc) { return CUBLAS_STATUS_NOT_SUPPORTED; @@ -714,7 +630,7 @@ cublasStatus_t cublasXamax(const size_t n, // ================================================================================================= // Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV -cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -722,8 +638,6 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, @@ -731,10 +645,10 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -742,8 +656,6 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha, @@ -751,10 +663,10 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -768,8 +680,6 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha_cuda, @@ -777,10 +687,10 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -794,8 +704,6 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgemv(handle, a_transpose, static_cast(m), static_cast(n), &alpha_cuda, @@ -803,10 +711,10 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -817,7 +725,7 @@ cublasStatus_t cublasXgemv(const Layout layout, const cublasOperation_t a_transp } // Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV -cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -825,8 +733,6 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, @@ -834,10 +740,10 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -845,8 +751,6 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha, @@ -854,10 +758,10 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -871,8 +775,6 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha_cuda, @@ -880,10 +782,10 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -897,8 +799,6 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgbmv(handle, a_transpose, static_cast(m), static_cast(n), static_cast(kl), static_cast(ku), &alpha_cuda, @@ -906,10 +806,10 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transpose, +cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -920,7 +820,7 @@ cublasStatus_t cublasXgbmv(const Layout layout, const cublasOperation_t a_transp } // Forwards the cuBLAS calls for CHEMV/ZHEMV -cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -934,8 +834,6 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChemv(handle, triangle, static_cast(n), &alpha_cuda, @@ -943,10 +841,10 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -960,8 +858,6 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhemv(handle, triangle, static_cast(n), &alpha_cuda, @@ -969,12 +865,12 @@ cublasStatus_t cublasXhemv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHBMV/ZHBMV -cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -988,8 +884,6 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChbmv(handle, triangle, static_cast(n), static_cast(k), &alpha_cuda, @@ -997,10 +891,10 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1014,8 +908,6 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhbmv(handle, triangle, static_cast(n), static_cast(k), &alpha_cuda, @@ -1023,12 +915,12 @@ cublasStatus_t cublasXhbmv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHPMV/ZHPMV -cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* ap_buffer, const size_t ap_offset, @@ -1042,8 +934,6 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpmv(handle, triangle, static_cast(n), &alpha_cuda, @@ -1051,10 +941,10 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* ap_buffer, const size_t ap_offset, @@ -1068,8 +958,6 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpmv(handle, triangle, static_cast(n), &alpha_cuda, @@ -1077,12 +965,12 @@ cublasStatus_t cublasXhpmv(const Layout layout, const cublasFillMode_t triangle, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), &beta_cuda, reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SSYMV/DSYMV -cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1090,8 +978,6 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsymv(handle, triangle, static_cast(n), &alpha, @@ -1099,10 +985,10 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1110,8 +996,6 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsymv(handle, triangle, static_cast(n), &alpha, @@ -1119,10 +1003,10 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1133,7 +1017,7 @@ cublasStatus_t cublasXsymv(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for SSBMV/DSBMV -cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1141,8 +1025,6 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, @@ -1150,10 +1032,10 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1161,8 +1043,6 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsbmv(handle, triangle, static_cast(n), static_cast(k), &alpha, @@ -1170,10 +1050,10 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -1184,7 +1064,7 @@ cublasStatus_t cublasXsbmv(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for SSPMV/DSPMV -cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* ap_buffer, const size_t ap_offset, @@ -1192,8 +1072,6 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, const float beta, float* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspmv(handle, triangle, static_cast(n), &alpha, @@ -1201,10 +1079,10 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* ap_buffer, const size_t ap_offset, @@ -1212,8 +1090,6 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, const double beta, double* y_buffer, const size_t y_offset, const size_t y_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspmv(handle, triangle, static_cast(n), &alpha, @@ -1221,10 +1097,10 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, &x_buffer[x_offset], static_cast(x_inc), &beta, &y_buffer[y_offset], static_cast(y_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* ap_buffer, const size_t ap_offset, @@ -1236,72 +1112,64 @@ cublasStatus_t cublasXspmv(const Layout layout, const cublasFillMode_t triangle, // Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV template -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const half* a_buffer, const size_t a_offset, const size_t a_ld, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1310,72 +1178,64 @@ cublasStatus_t cublasXtrmv(const Layout layout, const cublasFillMode_t tri // Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV template -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const half* a_buffer, const size_t a_offset, const size_t a_ld, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1384,72 +1244,64 @@ cublasStatus_t cublasXtbmv(const Layout layout, const cublasFillMode_t tri // Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV template -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* ap_buffer, const size_t ap_offset, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* ap_buffer, const size_t ap_offset, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* ap_buffer, const size_t ap_offset, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* ap_buffer, const size_t ap_offset, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* ap_buffer, const size_t ap_offset, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const half* ap_buffer, const size_t ap_offset, half* x_buffer, const size_t x_offset, const size_t x_inc) { @@ -1458,241 +1310,213 @@ cublasStatus_t cublasXtpmv(const Layout layout, const cublasFillMode_t tri // Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV template -cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtrsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV template -cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const T* a_buffer, const size_t a_offset, const size_t a_ld, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), &a_buffer[a_offset], a_ld, &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const float2* a_buffer, const size_t a_offset, const size_t a_ld, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtbsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const size_t k, const double2* a_buffer, const size_t a_offset, const size_t a_ld, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal, static_cast(n), static_cast(k), reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV template -cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const T* ap_buffer, const size_t ap_offset, T* x_buffer, const size_t x_offset, const size_t x_inc); template <> -cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float* ap_buffer, const size_t ap_offset, float* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double* ap_buffer, const size_t ap_offset, double* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), &ap_buffer[ap_offset], &x_buffer[x_offset], static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const float2* ap_buffer, const size_t ap_offset, float2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } template <> -cublasStatus_t cublasXtpsv(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t n, const double2* ap_buffer, const size_t ap_offset, double2* x_buffer, const size_t x_offset, const size_t x_inc) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal, static_cast(n), reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SGER/DGER -cublasStatus_t cublasXger(const Layout layout, +cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXger(const Layout layout, +cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDger(handle, static_cast(m), static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXger(const Layout layout, +cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1702,7 +1526,7 @@ cublasStatus_t cublasXger(const Layout layout, } // Forwards the cuBLAS calls for CGERU/ZGERU -cublasStatus_t cublasXgeru(const Layout layout, +cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1712,17 +1536,15 @@ cublasStatus_t cublasXgeru(const Layout layout, cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgeru(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgeru(const Layout layout, +cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1732,19 +1554,17 @@ cublasStatus_t cublasXgeru(const Layout layout, cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgeru(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CGERC/ZGERC -cublasStatus_t cublasXgerc(const Layout layout, +cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1754,17 +1574,15 @@ cublasStatus_t cublasXgerc(const Layout layout, cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgerc(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgerc(const Layout layout, +cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout, const size_t m, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1774,87 +1592,77 @@ cublasStatus_t cublasXgerc(const Layout layout, cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgerc(handle, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHER/ZHER -cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher(handle, triangle, static_cast(n), &alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXher(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher(handle, triangle, static_cast(n), &alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHPR/ZHPR -cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, float2* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpr(handle, triangle, static_cast(n), &alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&ap_buffer[ap_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhpr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, double2* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpr(handle, triangle, static_cast(n), &alpha, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&ap_buffer[ap_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHER2/ZHER2 -cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1864,18 +1672,16 @@ cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher2(handle, triangle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1885,20 +1691,18 @@ cublasStatus_t cublasXher2(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher2(handle, triangle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&a_buffer[a_offset]), a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for CHPR2/ZHPR2 -cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float2 alpha, const float2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1908,18 +1712,16 @@ cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChpr2(handle, triangle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&ap_buffer[ap_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double2 alpha, const double2* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1929,52 +1731,46 @@ cublasStatus_t cublasXhpr2(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhpr2(handle, triangle, static_cast(n), &alpha_cuda, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc), reinterpret_cast(&ap_buffer[ap_offset])); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SSYR/DSYR -cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -1983,39 +1779,35 @@ cublasStatus_t cublasXsyr(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for SSPR/DSPR -cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, float* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &ap_buffer[ap_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, double* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspr(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &ap_buffer[ap_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -2024,43 +1816,39 @@ cublasStatus_t cublasXspr(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for SSYR2/DSYR2 -cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* a_buffer, const size_t a_offset, const size_t a_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -2070,43 +1858,39 @@ cublasStatus_t cublasXsyr2(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for SSPR2/DSPR2 -cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const float alpha, const float* x_buffer, const size_t x_offset, const size_t x_inc, const float* y_buffer, const size_t y_offset, const size_t y_inc, float* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSspr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &ap_buffer[ap_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const double alpha, const double* x_buffer, const size_t x_offset, const size_t x_inc, const double* y_buffer, const size_t y_offset, const size_t y_inc, double* ap_buffer, const size_t ap_offset) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDspr2(handle, triangle, static_cast(n), &alpha, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc), &ap_buffer[ap_offset]); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, +cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const size_t n, const half alpha, const half* x_buffer, const size_t x_offset, const size_t x_inc, @@ -2120,7 +1904,7 @@ cublasStatus_t cublasXspr2(const Layout layout, const cublasFillMode_t triangle, // ================================================================================================= // Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM -cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2128,8 +1912,6 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, @@ -2137,10 +1919,10 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2148,8 +1930,6 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha, @@ -2157,10 +1937,10 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2174,8 +1954,6 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha_cuda, @@ -2183,10 +1961,10 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2200,8 +1978,6 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZgemm(handle, a_transpose, b_transpose, static_cast(m), static_cast(n), static_cast(k), &alpha_cuda, @@ -2209,10 +1985,10 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, +cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose, const size_t m, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2223,7 +1999,7 @@ cublasStatus_t cublasXgemm(const Layout layout, const cublasOperation_t a_transp } // Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM -cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2231,8 +2007,6 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, @@ -2240,10 +2014,10 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2251,8 +2025,6 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha, @@ -2260,10 +2032,10 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2277,8 +2049,6 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2286,10 +2056,10 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2303,8 +2073,6 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsymm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2312,10 +2080,10 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2326,7 +2094,7 @@ cublasStatus_t cublasXsymm(const Layout layout, const cublasSideMode_t side, con } // Forwards the cuBLAS calls for CHEMM/ZHEMM -cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2340,8 +2108,6 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasChemm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2349,10 +2115,10 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, +cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2366,8 +2132,6 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZhemm(handle, side, triangle, static_cast(m), static_cast(n), &alpha_cuda, @@ -2375,48 +2139,44 @@ cublasStatus_t cublasXhemm(const Layout layout, const cublasSideMode_t side, con reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK -cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, &a_buffer[a_offset], a_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2429,18 +2189,16 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2453,18 +2211,16 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsyrk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2474,45 +2230,41 @@ cublasStatus_t cublasXsyrk(const Layout layout, const cublasFillMode_t triangle, } // Forwards the cuBLAS calls for CHERK/ZHERK -cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const float alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, const float beta, float2* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXherk(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, +cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const size_t n, const size_t k, const double alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, const double beta, double2* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZherk(handle, triangle, a_transpose, static_cast(n), static_cast(k), &alpha, reinterpret_cast(&a_buffer[a_offset]), a_ld, &beta, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K -cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2520,8 +2272,6 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle const float beta, float* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasSsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, @@ -2529,10 +2279,10 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2540,8 +2290,6 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle const double beta, double* c_buffer, const size_t c_offset, const size_t c_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha, @@ -2549,10 +2297,10 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle &b_buffer[b_offset], b_ld, &beta, &c_buffer[c_offset], c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2566,8 +2314,6 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle cuComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2575,10 +2321,10 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2592,8 +2338,6 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle cuDoubleComplex beta_cuda; beta_cuda.x = beta.real(); beta_cuda.y = beta.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZsyr2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2601,10 +2345,10 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta_cuda, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2615,7 +2359,7 @@ cublasStatus_t cublasXsyr2k(const Layout layout, const cublasFillMode_t triangle } // Forwards the cuBLAS calls for CHER2K/ZHER2K -cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2626,8 +2370,6 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2635,10 +2377,10 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, +cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose, const size_t n, const size_t k, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2649,8 +2391,6 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZher2k(handle, triangle, ab_transpose, static_cast(n), static_cast(k), &alpha_cuda, @@ -2658,46 +2398,42 @@ cublasStatus_t cublasXher2k(const Layout layout, const cublasFillMode_t triangle reinterpret_cast(&b_buffer[b_offset]), b_ld, &beta, reinterpret_cast(&c_buffer[c_offset]), c_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } // Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM -cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2706,18 +2442,16 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2726,18 +2460,16 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const half alpha, const half* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2746,39 +2478,35 @@ cublasStatus_t cublasXtrmm(const Layout layout, const cublasSideMode_t side, con } // Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM -cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float alpha, const float* a_buffer, const size_t a_offset, const size_t a_ld, float* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double alpha, const double* a_buffer, const size_t a_offset, const size_t a_ld, double* b_buffer, const size_t b_offset, const size_t b_ld) { if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; } - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha, &a_buffer[a_offset], a_ld, &b_buffer[b_offset], b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const float2 alpha, const float2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2787,17 +2515,15 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con cuComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } -cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, +cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal, const size_t m, const size_t n, const double2 alpha, const double2* a_buffer, const size_t a_offset, const size_t a_ld, @@ -2806,14 +2532,12 @@ cublasStatus_t cublasXtrsm(const Layout layout, const cublasSideMode_t side, con cuDoubleComplex alpha_cuda; alpha_cuda.x = alpha.real(); alpha_cuda.y = alpha.imag(); - cublasHandle_t handle; - if (cublasCreate(&handle) != CUBLAS_STATUS_SUCCESS) { return CUBLAS_STATUS_NOT_INITIALIZED; } auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal, static_cast(m), static_cast(n), &alpha_cuda, reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); - cublasDestroy(handle); + cudaDeviceSynchronize(); return status; } diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp index 509de9d1..51f897c4 100644 --- a/test/wrapper_cuda.hpp +++ b/test/wrapper_cuda.hpp @@ -29,17 +29,47 @@ namespace clblast { // ================================================================================================= +#ifdef CLBLAST_REF_CUBLAS + template + void cublasSetup(Arguments &args) { + cudaSetDevice(static_cast(args.device_id)); + auto status = cublasCreate(reinterpret_cast(&args.cublas_handle)); + if (status != CUBLAS_STATUS_SUCCESS) { + throw std::runtime_error("CUDA cublasCreate error"); + } + } +#endif + +#ifdef CLBLAST_REF_CUBLAS + template + void cublasTeardown(Arguments &args) { + auto status = cublasDestroy(reinterpret_cast(args.cublas_handle)); + if (status != CUBLAS_STATUS_SUCCESS) { + throw std::runtime_error("CUDA cublasDestroy error"); + } + } +#endif + +// ================================================================================================= + // Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards #ifdef CLBLAST_REF_CUBLAS template - void CUDAToHost(T* buffer_cuda, std::vector &buffer_host, const size_t size) { - cudaMemcpy( + void CUDAToHost(T** buffer_cuda, std::vector &buffer_host, const size_t size) { + auto status1 = cudaMemcpy( reinterpret_cast(buffer_host.data()), - reinterpret_cast(buffer_cuda), + reinterpret_cast(*buffer_cuda), size*sizeof(T), cudaMemcpyDeviceToHost ); - cudaFree(buffer_cuda); + if (status1 != cudaSuccess) { + throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast(status1))); + } + auto status2 = cudaFree(*buffer_cuda); + if (status2 != cudaSuccess) { + throw std::runtime_error("CUDA cudaFree error with status: "+ToString(static_cast(status2))); + } + *buffer_cuda = nullptr; } #else template void CUDAToHost(T*, const std::vector&, const size_t) { } @@ -48,14 +78,22 @@ namespace clblast { // Allocates space on the CUDA device and copies in data from the host #ifdef CLBLAST_REF_CUBLAS template - void HostToCUDA(T* buffer_cuda, std::vector &buffer_host, const size_t size) { - cudaMalloc(reinterpret_cast(&buffer_cuda), size*sizeof(T)); - cudaMemcpy( - reinterpret_cast(buffer_cuda), + void HostToCUDA(T** buffer_cuda, std::vector &buffer_host, const size_t size) { + if (*buffer_cuda == nullptr) { + auto status1 = cudaMalloc(reinterpret_cast(buffer_cuda), size*sizeof(T)); + if (status1 != cudaSuccess) { + throw std::runtime_error("CUDA cudaMalloc error with status: "+ToString(static_cast(status1))); + } + } + auto status2 = cudaMemcpy( + reinterpret_cast(*buffer_cuda), reinterpret_cast(buffer_host.data()), size*sizeof(T), cudaMemcpyHostToDevice ); + if (status2 != cudaSuccess) { + throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast(status2))); + } } #else template void HostToCUDA(T*, const std::vector&, const size_t) { } @@ -65,26 +103,26 @@ namespace clblast { template struct BuffersCUDA { - T* x_vec; - T* y_vec; - T* a_mat; - T* b_mat; - T* c_mat; - T* ap_mat; - T* scalar; + T* x_vec = nullptr; + T* y_vec = nullptr; + T* a_mat = nullptr; + T* b_mat = nullptr; + T* c_mat = nullptr; + T* ap_mat = nullptr; + T* scalar = nullptr; }; template void CUDAToHost(const Arguments &args, BuffersCUDA &buffers, BuffersHost &buffers_host, const std::vector &names) { for (auto &name: names) { - if (name == kBufVecX) { buffers_host.x_vec = std::vector(args.x_size, static_cast(0)); CUDAToHost(buffers.x_vec, buffers_host.x_vec, args.x_size); } - else if (name == kBufVecY) { buffers_host.y_vec = std::vector(args.y_size, static_cast(0)); CUDAToHost(buffers.y_vec, buffers_host.y_vec, args.y_size); } - else if (name == kBufMatA) { buffers_host.a_mat = std::vector(args.a_size, static_cast(0)); CUDAToHost(buffers.a_mat, buffers_host.a_mat, args.a_size); } - else if (name == kBufMatB) { buffers_host.b_mat = std::vector(args.b_size, static_cast(0)); CUDAToHost(buffers.b_mat, buffers_host.b_mat, args.b_size); } - else if (name == kBufMatC) { buffers_host.c_mat = std::vector(args.c_size, static_cast(0)); CUDAToHost(buffers.c_mat, buffers_host.c_mat, args.c_size); } - else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector(args.ap_size, static_cast(0)); CUDAToHost(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } - else if (name == kBufScalar) { buffers_host.scalar = std::vector(args.scalar_size, static_cast(0)); CUDAToHost(buffers.scalar, buffers_host.scalar, args.scalar_size); } + if (name == kBufVecX) { buffers_host.x_vec = std::vector(args.x_size, static_cast(0)); CUDAToHost(&buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { buffers_host.y_vec = std::vector(args.y_size, static_cast(0)); CUDAToHost(&buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { buffers_host.a_mat = std::vector(args.a_size, static_cast(0)); CUDAToHost(&buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { buffers_host.b_mat = std::vector(args.b_size, static_cast(0)); CUDAToHost(&buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { buffers_host.c_mat = std::vector(args.c_size, static_cast(0)); CUDAToHost(&buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector(args.ap_size, static_cast(0)); CUDAToHost(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { buffers_host.scalar = std::vector(args.scalar_size, static_cast(0)); CUDAToHost(&buffers.scalar, buffers_host.scalar, args.scalar_size); } else { throw std::runtime_error("Invalid buffer name"); } } } @@ -93,13 +131,13 @@ template void HostToCUDA(const Arguments &args, BuffersCUDA &buffers, BuffersHost &buffers_host, const std::vector &names) { for (auto &name: names) { - if (name == kBufVecX) { HostToCUDA(buffers.x_vec, buffers_host.x_vec, args.x_size); } - else if (name == kBufVecY) { HostToCUDA(buffers.y_vec, buffers_host.y_vec, args.y_size); } - else if (name == kBufMatA) { HostToCUDA(buffers.a_mat, buffers_host.a_mat, args.a_size); } - else if (name == kBufMatB) { HostToCUDA(buffers.b_mat, buffers_host.b_mat, args.b_size); } - else if (name == kBufMatC) { HostToCUDA(buffers.c_mat, buffers_host.c_mat, args.c_size); } - else if (name == kBufMatAP) { HostToCUDA(buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } - else if (name == kBufScalar) { HostToCUDA(buffers.scalar, buffers_host.scalar, args.scalar_size); } + if (name == kBufVecX) { HostToCUDA(&buffers.x_vec, buffers_host.x_vec, args.x_size); } + else if (name == kBufVecY) { HostToCUDA(&buffers.y_vec, buffers_host.y_vec, args.y_size); } + else if (name == kBufMatA) { HostToCUDA(&buffers.a_mat, buffers_host.a_mat, args.a_size); } + else if (name == kBufMatB) { HostToCUDA(&buffers.b_mat, buffers_host.b_mat, args.b_size); } + else if (name == kBufMatC) { HostToCUDA(&buffers.c_mat, buffers_host.c_mat, args.c_size); } + else if (name == kBufMatAP) { HostToCUDA(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); } + else if (name == kBufScalar) { HostToCUDA(&buffers.scalar, buffers_host.scalar, args.scalar_size); } else { throw std::runtime_error("Invalid buffer name"); } } } -- cgit v1.2.3 From e3bb58f60277e70a26b2cef782945027871135d5 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 16 Apr 2017 17:53:51 +0200 Subject: Finalized support for performance testing against cuBLAS --- CHANGELOG | 1 + CMakeLists.txt | 11 +++++++---- README.md | 2 +- test/wrapper_cuda.hpp | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'test') diff --git a/CHANGELOG b/CHANGELOG index 0b4e9951..6643cc32 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ Development version (next release) - Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM - Tests now also exit with an error code when OpenCL errors or compilation errors occur - Tests now also check for the L2 error in case of half-precision +- Clients can now test against cuBLAS on NVIDIA systems for performance comparisons (-DCUBLAS=ON) - Replaced the R graph scripts with Python/Matplotlib scripts - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0fb04071..b26de79a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ option(TUNERS "Enable compilation of the tuners" OFF) option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF) option(TESTS "Enable compilation of the correctness tests" OFF) option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF) +option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF) # Compile in verbose mode with additional diagnostic messages option(VERBOSE "Compile in verbose mode for additional diagnostic messages" OFF) @@ -134,14 +135,16 @@ endif() if(CLIENTS OR TESTS) find_package(clBLAS) find_package(CBLAS) - find_package(cuBLAS) - if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND AND NOT CUBLAS_FOUND) + if(CUBLAS) + find_package(cuBLAS) + endif() + if(NOT CLBLAS_FOUND AND NOT CBLAS_FOUND) if(TESTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, disabling the compilation of the tests") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS, disabling the compilation of the tests") set(TESTS OFF) endif() if(CLIENTS) - message(STATUS "Could NOT find clBLAS nor a CPU BLAS nor cuBLAS, head-to-head performance comparison not supported in the clients") + message(STATUS "Could NOT find clBLAS nor a CPU BLAS, head-to-head performance comparison not supported in the clients") endif() endif() endif() diff --git a/README.md b/README.md index 3109b4bf..835f5eea 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ All tests can be run directly together in one go through the `make alltests` tar Compiling the performance tests/clients (optional) ------------- -To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS) or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows: +To test the performance of CLBlast and compare optionally against [clBLAS](http://github.com/clMathLibraries/clBLAS), cuBLAS (if testing on an NVIDIA GPU and `-DCUBLAS=ON` set), or a CPU BLAS library (see above for requirements), compile with the clients enabled by specifying `-DCLIENTS=ON`, for example as follows: cmake -DCLIENTS=ON .. diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp index 51f897c4..c97ae3ef 100644 --- a/test/wrapper_cuda.hpp +++ b/test/wrapper_cuda.hpp @@ -72,7 +72,7 @@ namespace clblast { *buffer_cuda = nullptr; } #else - template void CUDAToHost(T*, const std::vector&, const size_t) { } + template void CUDAToHost(T**, const std::vector&, const size_t) { } #endif // Allocates space on the CUDA device and copies in data from the host @@ -96,7 +96,7 @@ namespace clblast { } } #else - template void HostToCUDA(T*, const std::vector&, const size_t) { } + template void HostToCUDA(T**, const std::vector&, const size_t) { } #endif // ================================================================================================= -- cgit v1.2.3