summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-04-16 19:41:14 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-04-16 19:41:14 +0200
commit2673f5051820db82ebb857d88c2f36f7cacbed7d (patch)
treef3323af174bde2793b3c4692f3404d2a18c5eadb /test
parent063ef729e123aa2cebc7f67c73f99f3e15606fe2 (diff)
parentb20c518f9fd05a69957c2018e72c6a648f5cdb7d (diff)
Merge branch 'development' into benchmarking
Diffstat (limited to 'test')
-rw-r--r--test/correctness/misc/override_parameters.cpp6
-rw-r--r--test/correctness/routines/level1/xamax.cpp8
-rw-r--r--test/correctness/routines/level1/xasum.cpp8
-rw-r--r--test/correctness/routines/level1/xaxpy.cpp8
-rw-r--r--test/correctness/routines/level1/xcopy.cpp8
-rw-r--r--test/correctness/routines/level1/xdot.cpp4
-rw-r--r--test/correctness/routines/level1/xdotc.cpp8
-rw-r--r--test/correctness/routines/level1/xdotu.cpp8
-rw-r--r--test/correctness/routines/level1/xnrm2.cpp8
-rw-r--r--test/correctness/routines/level1/xrot.cpp4
-rw-r--r--test/correctness/routines/level1/xrotg.cpp4
-rw-r--r--test/correctness/routines/level1/xrotm.cpp4
-rw-r--r--test/correctness/routines/level1/xrotmg.cpp4
-rw-r--r--test/correctness/routines/level1/xscal.cpp8
-rw-r--r--test/correctness/routines/level1/xswap.cpp8
-rw-r--r--test/correctness/routines/level2/xgbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xgemv.cpp8
-rw-r--r--test/correctness/routines/level2/xger.cpp4
-rw-r--r--test/correctness/routines/level2/xgerc.cpp8
-rw-r--r--test/correctness/routines/level2/xgeru.cpp8
-rw-r--r--test/correctness/routines/level2/xhbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xhemv.cpp8
-rw-r--r--test/correctness/routines/level2/xher.cpp8
-rw-r--r--test/correctness/routines/level2/xher2.cpp8
-rw-r--r--test/correctness/routines/level2/xhpmv.cpp8
-rw-r--r--test/correctness/routines/level2/xhpr.cpp8
-rw-r--r--test/correctness/routines/level2/xhpr2.cpp8
-rw-r--r--test/correctness/routines/level2/xsbmv.cpp4
-rw-r--r--test/correctness/routines/level2/xspmv.cpp4
-rw-r--r--test/correctness/routines/level2/xspr.cpp4
-rw-r--r--test/correctness/routines/level2/xspr2.cpp4
-rw-r--r--test/correctness/routines/level2/xsymv.cpp4
-rw-r--r--test/correctness/routines/level2/xsyr.cpp4
-rw-r--r--test/correctness/routines/level2/xsyr2.cpp4
-rw-r--r--test/correctness/routines/level2/xtbmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtbsv.cpp8
-rw-r--r--test/correctness/routines/level2/xtpmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtpsv.cpp8
-rw-r--r--test/correctness/routines/level2/xtrmv.cpp8
-rw-r--r--test/correctness/routines/level2/xtrsv.cpp8
-rw-r--r--test/correctness/routines/level3/xgemm.cpp8
-rw-r--r--test/correctness/routines/level3/xhemm.cpp8
-rw-r--r--test/correctness/routines/level3/xher2k.cpp8
-rw-r--r--test/correctness/routines/level3/xherk.cpp8
-rw-r--r--test/correctness/routines/level3/xsymm.cpp8
-rw-r--r--test/correctness/routines/level3/xsyr2k.cpp8
-rw-r--r--test/correctness/routines/level3/xsyrk.cpp8
-rw-r--r--test/correctness/routines/level3/xtrmm.cpp8
-rw-r--r--test/correctness/routines/level3/xtrsm.cpp8
-rw-r--r--test/correctness/routines/levelx/xaxpybatched.cpp8
-rw-r--r--test/correctness/routines/levelx/xgemmbatched.cpp8
-rw-r--r--test/correctness/routines/levelx/xomatcopy.cpp8
-rw-r--r--test/correctness/tester.cpp44
-rw-r--r--test/correctness/tester.hpp1
-rw-r--r--test/performance/client.cpp38
-rw-r--r--test/performance/client.hpp12
-rw-r--r--test/performance/routines/level1/xamax.cpp8
-rw-r--r--test/performance/routines/level1/xasum.cpp8
-rw-r--r--test/performance/routines/level1/xaxpy.cpp8
-rw-r--r--test/performance/routines/level1/xcopy.cpp8
-rw-r--r--test/performance/routines/level1/xdot.cpp4
-rw-r--r--test/performance/routines/level1/xdotc.cpp8
-rw-r--r--test/performance/routines/level1/xdotu.cpp8
-rw-r--r--test/performance/routines/level1/xnrm2.cpp8
-rw-r--r--test/performance/routines/level1/xrot.cpp4
-rw-r--r--test/performance/routines/level1/xrotg.cpp4
-rw-r--r--test/performance/routines/level1/xrotm.cpp4
-rw-r--r--test/performance/routines/level1/xrotmg.cpp4
-rw-r--r--test/performance/routines/level1/xscal.cpp8
-rw-r--r--test/performance/routines/level1/xswap.cpp8
-rw-r--r--test/performance/routines/level2/xgbmv.cpp8
-rw-r--r--test/performance/routines/level2/xgemv.cpp8
-rw-r--r--test/performance/routines/level2/xger.cpp4
-rw-r--r--test/performance/routines/level2/xgerc.cpp8
-rw-r--r--test/performance/routines/level2/xgeru.cpp8
-rw-r--r--test/performance/routines/level2/xhbmv.cpp8
-rw-r--r--test/performance/routines/level2/xhemv.cpp8
-rw-r--r--test/performance/routines/level2/xher.cpp8
-rw-r--r--test/performance/routines/level2/xher2.cpp8
-rw-r--r--test/performance/routines/level2/xhpmv.cpp8
-rw-r--r--test/performance/routines/level2/xhpr.cpp8
-rw-r--r--test/performance/routines/level2/xhpr2.cpp8
-rw-r--r--test/performance/routines/level2/xsbmv.cpp4
-rw-r--r--test/performance/routines/level2/xspmv.cpp4
-rw-r--r--test/performance/routines/level2/xspr.cpp4
-rw-r--r--test/performance/routines/level2/xspr2.cpp4
-rw-r--r--test/performance/routines/level2/xsymv.cpp4
-rw-r--r--test/performance/routines/level2/xsyr.cpp4
-rw-r--r--test/performance/routines/level2/xsyr2.cpp4
-rw-r--r--test/performance/routines/level2/xtbmv.cpp8
-rw-r--r--test/performance/routines/level2/xtbsv.cpp8
-rw-r--r--test/performance/routines/level2/xtpmv.cpp8
-rw-r--r--test/performance/routines/level2/xtpsv.cpp8
-rw-r--r--test/performance/routines/level2/xtrmv.cpp8
-rw-r--r--test/performance/routines/level2/xtrsv.cpp8
-rw-r--r--test/performance/routines/level3/xgemm.cpp8
-rw-r--r--test/performance/routines/level3/xhemm.cpp8
-rw-r--r--test/performance/routines/level3/xher2k.cpp8
-rw-r--r--test/performance/routines/level3/xherk.cpp8
-rw-r--r--test/performance/routines/level3/xsymm.cpp8
-rw-r--r--test/performance/routines/level3/xsyr2k.cpp8
-rw-r--r--test/performance/routines/level3/xsyrk.cpp8
-rw-r--r--test/performance/routines/level3/xtrmm.cpp8
-rw-r--r--test/performance/routines/level3/xtrsm.cpp8
-rw-r--r--test/performance/routines/levelx/xaxpybatched.cpp8
-rw-r--r--test/performance/routines/levelx/xgemmbatched.cpp8
-rw-r--r--test/performance/routines/levelx/xomatcopy.cpp8
-rw-r--r--test/routines/common.hpp36
-rw-r--r--test/routines/level1/xamax.hpp20
-rw-r--r--test/routines/level1/xasum.hpp20
-rw-r--r--test/routines/level1/xaxpy.hpp20
-rw-r--r--test/routines/level1/xcopy.hpp20
-rw-r--r--test/routines/level1/xdot.hpp21
-rw-r--r--test/routines/level1/xdotc.hpp21
-rw-r--r--test/routines/level1/xdotu.hpp21
-rw-r--r--test/routines/level1/xnrm2.hpp20
-rw-r--r--test/routines/level1/xscal.hpp19
-rw-r--r--test/routines/level1/xswap.hpp20
-rw-r--r--test/routines/level2/xgbmv.hpp23
-rw-r--r--test/routines/level2/xgemv.hpp23
-rw-r--r--test/routines/level2/xger.hpp22
-rw-r--r--test/routines/level2/xgerc.hpp22
-rw-r--r--test/routines/level2/xgeru.hpp22
-rw-r--r--test/routines/level2/xhbmv.hpp23
-rw-r--r--test/routines/level2/xhemv.hpp23
-rw-r--r--test/routines/level2/xher.hpp22
-rw-r--r--test/routines/level2/xher2.hpp23
-rw-r--r--test/routines/level2/xhpmv.hpp23
-rw-r--r--test/routines/level2/xhpr.hpp22
-rw-r--r--test/routines/level2/xhpr2.hpp23
-rw-r--r--test/routines/level2/xsbmv.hpp23
-rw-r--r--test/routines/level2/xspmv.hpp23
-rw-r--r--test/routines/level2/xspr.hpp22
-rw-r--r--test/routines/level2/xspr2.hpp23
-rw-r--r--test/routines/level2/xsymv.hpp23
-rw-r--r--test/routines/level2/xsyr.hpp22
-rw-r--r--test/routines/level2/xsyr2.hpp23
-rw-r--r--test/routines/level2/xtbmv.hpp24
-rw-r--r--test/routines/level2/xtpmv.hpp24
-rw-r--r--test/routines/level2/xtrmv.hpp24
-rw-r--r--test/routines/level2/xtrsv.hpp24
-rw-r--r--test/routines/level3/xgemm.hpp24
-rw-r--r--test/routines/level3/xhemm.hpp24
-rw-r--r--test/routines/level3/xher2k.hpp25
-rw-r--r--test/routines/level3/xherk.hpp23
-rw-r--r--test/routines/level3/xsymm.hpp24
-rw-r--r--test/routines/level3/xsyr2k.hpp24
-rw-r--r--test/routines/level3/xsyrk.hpp23
-rw-r--r--test/routines/level3/xtrmm.hpp25
-rw-r--r--test/routines/level3/xtrsm.hpp26
-rw-r--r--test/routines/levelx/xaxpybatched.hpp25
-rw-r--r--test/routines/levelx/xgemmbatched.hpp27
-rw-r--r--test/routines/levelx/xinvert.hpp8
-rw-r--r--test/routines/levelx/xomatcopy.hpp6
-rw-r--r--test/wrapper_cblas.hpp268
-rw-r--r--test/wrapper_cublas.hpp2548
-rw-r--r--test/wrapper_cuda.hpp149
157 files changed, 3702 insertions, 1124 deletions
diff --git a/test/correctness/misc/override_parameters.cpp b/test/correctness/misc/override_parameters.cpp
index e6eebef7..4283c039 100644
--- a/test/correctness/misc/override_parameters.cpp
+++ b/test/correctness/misc/override_parameters.cpp
@@ -129,15 +129,11 @@ size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::st
// =================================================================================================
} // namespace clblast
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunOverrideTests<float>(argc, argv, false, "SGEMM");
- errors += clblast::RunOverrideTests<float2>(argc, argv, true, "CGEMM");
+ errors += clblast::RunOverrideTests<clblast::float2>(argc, argv, true, "CGEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xamax.cpp b/test/correctness/routines/level1/xamax.cpp
index 607637e8..d940ae7a 100644
--- a/test/correctness/routines/level1/xamax.cpp
+++ b/test/correctness/routines/level1/xamax.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xamax.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXamax<float>, float, float>(argc, argv, false, "iSAMAX");
errors += clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX");
- errors += clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX");
- errors += clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX");
+ errors += clblast::RunTests<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "iCAMAX");
+ errors += clblast::RunTests<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "iZAMAX");
errors += clblast::RunTests<clblast::TestXamax<half>, half, half>(argc, argv, true, "iHAMAX");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xasum.cpp b/test/correctness/routines/level1/xasum.cpp
index e22e42a6..b969d662 100644
--- a/test/correctness/routines/level1/xasum.cpp
+++ b/test/correctness/routines/level1/xasum.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xasum.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXasum<float>, float, float>(argc, argv, false, "SASUM");
errors += clblast::RunTests<clblast::TestXasum<double>, double, double>(argc, argv, true, "DASUM");
- errors += clblast::RunTests<clblast::TestXasum<float2>, float2, float2>(argc, argv, true, "ScASUM");
- errors += clblast::RunTests<clblast::TestXasum<double2>, double2, double2>(argc, argv, true, "DzASUM");
+ errors += clblast::RunTests<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScASUM");
+ errors += clblast::RunTests<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzASUM");
errors += clblast::RunTests<clblast::TestXasum<half>, half, half>(argc, argv, true, "HASUM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xaxpy.cpp b/test/correctness/routines/level1/xaxpy.cpp
index 064172fa..6f4f34fb 100644
--- a/test/correctness/routines/level1/xaxpy.cpp
+++ b/test/correctness/routines/level1/xaxpy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xaxpy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXaxpy<float>, float, float>(argc, argv, false, "SAXPY");
errors += clblast::RunTests<clblast::TestXaxpy<double>, double, double>(argc, argv, true, "DAXPY");
- errors += clblast::RunTests<clblast::TestXaxpy<float2>, float2, float2>(argc, argv, true, "CAXPY");
- errors += clblast::RunTests<clblast::TestXaxpy<double2>, double2, double2>(argc, argv, true, "ZAXPY");
+ errors += clblast::RunTests<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPY");
+ errors += clblast::RunTests<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPY");
errors += clblast::RunTests<clblast::TestXaxpy<half>, half, half>(argc, argv, true, "HAXPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xcopy.cpp b/test/correctness/routines/level1/xcopy.cpp
index e6f2581b..e6e94d34 100644
--- a/test/correctness/routines/level1/xcopy.cpp
+++ b/test/correctness/routines/level1/xcopy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXcopy<float>, float, float>(argc, argv, false, "SCOPY");
errors += clblast::RunTests<clblast::TestXcopy<double>, double, double>(argc, argv, true, "DCOPY");
- errors += clblast::RunTests<clblast::TestXcopy<float2>, float2, float2>(argc, argv, true, "CCOPY");
- errors += clblast::RunTests<clblast::TestXcopy<double2>, double2, double2>(argc, argv, true, "ZCOPY");
+ errors += clblast::RunTests<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CCOPY");
+ errors += clblast::RunTests<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZCOPY");
errors += clblast::RunTests<clblast::TestXcopy<half>, half, half>(argc, argv, true, "HCOPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xdot.cpp b/test/correctness/routines/level1/xdot.cpp
index 080250cb..8dccbf26 100644
--- a/test/correctness/routines/level1/xdot.cpp
+++ b/test/correctness/routines/level1/xdot.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xdotc.cpp b/test/correctness/routines/level1/xdotc.cpp
index 2a7bbeca..59eedddc 100644
--- a/test/correctness/routines/level1/xdotc.cpp
+++ b/test/correctness/routines/level1/xdotc.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdotc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXdotc<float2>, float2, float2>(argc, argv, false, "CDOTC");
- errors += clblast::RunTests<clblast::TestXdotc<double2>, double2, double2>(argc, argv, true, "ZDOTC");
+ errors += clblast::RunTests<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTC");
+ errors += clblast::RunTests<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTC");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xdotu.cpp b/test/correctness/routines/level1/xdotu.cpp
index 1047d021..4392326d 100644
--- a/test/correctness/routines/level1/xdotu.cpp
+++ b/test/correctness/routines/level1/xdotu.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xdotu.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXdotu<float2>, float2, float2>(argc, argv, false, "CDOTU");
- errors += clblast::RunTests<clblast::TestXdotu<double2>, double2, double2>(argc, argv, true, "ZDOTU");
+ errors += clblast::RunTests<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CDOTU");
+ errors += clblast::RunTests<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZDOTU");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xnrm2.cpp b/test/correctness/routines/level1/xnrm2.cpp
index 142fa7ba..46ca1526 100644
--- a/test/correctness/routines/level1/xnrm2.cpp
+++ b/test/correctness/routines/level1/xnrm2.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xnrm2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXnrm2<float>, float, float>(argc, argv, false, "SNRM2");
errors += clblast::RunTests<clblast::TestXnrm2<double>, double, double>(argc, argv, true, "DNRM2");
- errors += clblast::RunTests<clblast::TestXnrm2<float2>, float2, float2>(argc, argv, true, "ScNRM2");
- errors += clblast::RunTests<clblast::TestXnrm2<double2>, double2, double2>(argc, argv, true, "DzNRM2");
+ errors += clblast::RunTests<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "ScNRM2");
+ errors += clblast::RunTests<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "DzNRM2");
errors += clblast::RunTests<clblast::TestXnrm2<half>, half, half>(argc, argv, true, "HNRM2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xrot.cpp b/test/correctness/routines/level1/xrot.cpp
index 5af358eb..d5eb6516 100644
--- a/test/correctness/routines/level1/xrot.cpp
+++ b/test/correctness/routines/level1/xrot.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotg.cpp b/test/correctness/routines/level1/xrotg.cpp
index ad23a554..ec544eab 100644
--- a/test/correctness/routines/level1/xrotg.cpp
+++ b/test/correctness/routines/level1/xrotg.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotm.cpp b/test/correctness/routines/level1/xrotm.cpp
index 4f7e8f15..7f2d7ce6 100644
--- a/test/correctness/routines/level1/xrotm.cpp
+++ b/test/correctness/routines/level1/xrotm.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xrotmg.cpp b/test/correctness/routines/level1/xrotmg.cpp
index ca89bc12..4ef6e67d 100644
--- a/test/correctness/routines/level1/xrotmg.cpp
+++ b/test/correctness/routines/level1/xrotmg.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xrotmg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level1/xscal.cpp b/test/correctness/routines/level1/xscal.cpp
index 939524be..c9788142 100644
--- a/test/correctness/routines/level1/xscal.cpp
+++ b/test/correctness/routines/level1/xscal.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xscal.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXscal<float>, float, float>(argc, argv, false, "SSCAL");
errors += clblast::RunTests<clblast::TestXscal<double>, double, double>(argc, argv, true, "DSCAL");
- errors += clblast::RunTests<clblast::TestXscal<float2>, float2, float2>(argc, argv, true, "CSCAL");
- errors += clblast::RunTests<clblast::TestXscal<double2>, double2, double2>(argc, argv, true, "ZSCAL");
+ errors += clblast::RunTests<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSCAL");
+ errors += clblast::RunTests<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSCAL");
errors += clblast::RunTests<clblast::TestXscal<half>, half, half>(argc, argv, true, "HSCAL");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level1/xswap.cpp b/test/correctness/routines/level1/xswap.cpp
index 446f3d65..ee694a08 100644
--- a/test/correctness/routines/level1/xswap.cpp
+++ b/test/correctness/routines/level1/xswap.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level1/xswap.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXswap<float>, float, float>(argc, argv, false, "SSWAP");
errors += clblast::RunTests<clblast::TestXswap<double>, double, double>(argc, argv, true, "DSWAP");
- errors += clblast::RunTests<clblast::TestXswap<float2>, float2, float2>(argc, argv, true, "CSWAP");
- errors += clblast::RunTests<clblast::TestXswap<double2>, double2, double2>(argc, argv, true, "ZSWAP");
+ errors += clblast::RunTests<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSWAP");
+ errors += clblast::RunTests<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSWAP");
errors += clblast::RunTests<clblast::TestXswap<half>, half, half>(argc, argv, true, "HSWAP");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgbmv.cpp b/test/correctness/routines/level2/xgbmv.cpp
index 8c49bc65..6aac283b 100644
--- a/test/correctness/routines/level2/xgbmv.cpp
+++ b/test/correctness/routines/level2/xgbmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgbmv<float>, float, float>(argc, argv, false, "SGBMV");
errors += clblast::RunTests<clblast::TestXgbmv<double>, double, double>(argc, argv, true, "DGBMV");
- errors += clblast::RunTests<clblast::TestXgbmv<float2>, float2, float2>(argc, argv, true, "CGBMV");
- errors += clblast::RunTests<clblast::TestXgbmv<double2>, double2, double2>(argc, argv, true, "ZGBMV");
+ errors += clblast::RunTests<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGBMV");
+ errors += clblast::RunTests<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGBMV");
errors += clblast::RunTests<clblast::TestXgbmv<half>, half, half>(argc, argv, true, "HGBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgemv.cpp b/test/correctness/routines/level2/xgemv.cpp
index 902ae777..66994b89 100644
--- a/test/correctness/routines/level2/xgemv.cpp
+++ b/test/correctness/routines/level2/xgemv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemv<float>, float, float>(argc, argv, false, "SGEMV");
errors += clblast::RunTests<clblast::TestXgemv<double>, double, double>(argc, argv, true, "DGEMV");
- errors += clblast::RunTests<clblast::TestXgemv<float2>, float2, float2>(argc, argv, true, "CGEMV");
- errors += clblast::RunTests<clblast::TestXgemv<double2>, double2, double2>(argc, argv, true, "ZGEMV");
+ errors += clblast::RunTests<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMV");
+ errors += clblast::RunTests<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMV");
errors += clblast::RunTests<clblast::TestXgemv<half>, half, half>(argc, argv, true, "HGEMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xger.cpp b/test/correctness/routines/level2/xger.cpp
index ce61bbcb..3b5d16e9 100644
--- a/test/correctness/routines/level2/xger.cpp
+++ b/test/correctness/routines/level2/xger.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xger.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xgerc.cpp b/test/correctness/routines/level2/xgerc.cpp
index b747f20d..42f6bb45 100644
--- a/test/correctness/routines/level2/xgerc.cpp
+++ b/test/correctness/routines/level2/xgerc.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgerc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXgerc<float2>, float2, float2>(argc, argv, false, "CGERC");
- errors += clblast::RunTests<clblast::TestXgerc<double2>, double2, double2>(argc, argv, true, "ZGERC");
+ errors += clblast::RunTests<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERC");
+ errors += clblast::RunTests<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERC");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xgeru.cpp b/test/correctness/routines/level2/xgeru.cpp
index f80c1e2b..f167eff5 100644
--- a/test/correctness/routines/level2/xgeru.cpp
+++ b/test/correctness/routines/level2/xgeru.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xgeru.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXgeru<float2>, float2, float2>(argc, argv, false, "CGERU");
- errors += clblast::RunTests<clblast::TestXgeru<double2>, double2, double2>(argc, argv, true, "ZGERU");
+ errors += clblast::RunTests<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CGERU");
+ errors += clblast::RunTests<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGERU");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhbmv.cpp b/test/correctness/routines/level2/xhbmv.cpp
index a4885c01..168d9474 100644
--- a/test/correctness/routines/level2/xhbmv.cpp
+++ b/test/correctness/routines/level2/xhbmv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhbmv<float2>, float2, float2>(argc, argv, false, "CHBMV");
- errors += clblast::RunTests<clblast::TestXhbmv<double2>, double2, double2>(argc, argv, true, "ZHBMV");
+ errors += clblast::RunTests<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHBMV");
+ errors += clblast::RunTests<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhemv.cpp b/test/correctness/routines/level2/xhemv.cpp
index 4318ffee..eabdf67d 100644
--- a/test/correctness/routines/level2/xhemv.cpp
+++ b/test/correctness/routines/level2/xhemv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhemv<float2>, float2, float2>(argc, argv, false, "CHEMV");
- errors += clblast::RunTests<clblast::TestXhemv<double2>, double2, double2>(argc, argv, true, "ZHEMV");
+ errors += clblast::RunTests<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMV");
+ errors += clblast::RunTests<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xher.cpp b/test/correctness/routines/level2/xher.cpp
index fe37bd76..a47a45ac 100644
--- a/test/correctness/routines/level2/xher.cpp
+++ b/test/correctness/routines/level2/xher.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xher.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher<float2,float>, float2, float>(argc, argv, false, "CHER");
- errors += clblast::RunTests<clblast::TestXher<double2,double>, double2, double>(argc, argv, true, "ZHER");
+ errors += clblast::RunTests<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER");
+ errors += clblast::RunTests<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xher2.cpp b/test/correctness/routines/level2/xher2.cpp
index 0b4af4d0..544ab16d 100644
--- a/test/correctness/routines/level2/xher2.cpp
+++ b/test/correctness/routines/level2/xher2.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xher2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher2<float2>, float2, float2>(argc, argv, false, "CHER2");
- errors += clblast::RunTests<clblast::TestXher2<double2>, double2, double2>(argc, argv, true, "ZHER2");
+ errors += clblast::RunTests<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHER2");
+ errors += clblast::RunTests<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHER2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpmv.cpp b/test/correctness/routines/level2/xhpmv.cpp
index dd77df71..30d23b8f 100644
--- a/test/correctness/routines/level2/xhpmv.cpp
+++ b/test/correctness/routines/level2/xhpmv.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpmv<float2>, float2, float2>(argc, argv, false, "CHPMV");
- errors += clblast::RunTests<clblast::TestXhpmv<double2>, double2, double2>(argc, argv, true, "ZHPMV");
+ errors += clblast::RunTests<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPMV");
+ errors += clblast::RunTests<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpr.cpp b/test/correctness/routines/level2/xhpr.cpp
index 5a3f615f..ed876857 100644
--- a/test/correctness/routines/level2/xhpr.cpp
+++ b/test/correctness/routines/level2/xhpr.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpr<float2,float>, float2, float>(argc, argv, false, "CHPR");
- errors += clblast::RunTests<clblast::TestXhpr<double2,double>, double2, double>(argc, argv, true, "ZHPR");
+ errors += clblast::RunTests<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHPR");
+ errors += clblast::RunTests<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHPR");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xhpr2.cpp b/test/correctness/routines/level2/xhpr2.cpp
index 8218b444..b3bd167a 100644
--- a/test/correctness/routines/level2/xhpr2.cpp
+++ b/test/correctness/routines/level2/xhpr2.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xhpr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhpr2<float2>, float2, float2>(argc, argv, false, "CHPR2");
- errors += clblast::RunTests<clblast::TestXhpr2<double2>, double2, double2>(argc, argv, true, "ZHPR2");
+ errors += clblast::RunTests<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHPR2");
+ errors += clblast::RunTests<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHPR2");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xsbmv.cpp b/test/correctness/routines/level2/xsbmv.cpp
index 7918cb21..3b6b3972 100644
--- a/test/correctness/routines/level2/xsbmv.cpp
+++ b/test/correctness/routines/level2/xsbmv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspmv.cpp b/test/correctness/routines/level2/xspmv.cpp
index 78210660..9dccdbc1 100644
--- a/test/correctness/routines/level2/xspmv.cpp
+++ b/test/correctness/routines/level2/xspmv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspr.cpp b/test/correctness/routines/level2/xspr.cpp
index d05adf34..9cf242c1 100644
--- a/test/correctness/routines/level2/xspr.cpp
+++ b/test/correctness/routines/level2/xspr.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xspr2.cpp b/test/correctness/routines/level2/xspr2.cpp
index caa46a09..2650bd03 100644
--- a/test/correctness/routines/level2/xspr2.cpp
+++ b/test/correctness/routines/level2/xspr2.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xspr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsymv.cpp b/test/correctness/routines/level2/xsymv.cpp
index 978a5f8a..3f0a8f8b 100644
--- a/test/correctness/routines/level2/xsymv.cpp
+++ b/test/correctness/routines/level2/xsymv.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsymv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsyr.cpp b/test/correctness/routines/level2/xsyr.cpp
index 244dbfb4..15ac1f14 100644
--- a/test/correctness/routines/level2/xsyr.cpp
+++ b/test/correctness/routines/level2/xsyr.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsyr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xsyr2.cpp b/test/correctness/routines/level2/xsyr2.cpp
index 422e67ad..74806219 100644
--- a/test/correctness/routines/level2/xsyr2.cpp
+++ b/test/correctness/routines/level2/xsyr2.cpp
@@ -12,10 +12,6 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xsyr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
diff --git a/test/correctness/routines/level2/xtbmv.cpp b/test/correctness/routines/level2/xtbmv.cpp
index 491708ec..667ae732 100644
--- a/test/correctness/routines/level2/xtbmv.cpp
+++ b/test/correctness/routines/level2/xtbmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtbmv<float>, float, float>(argc, argv, false, "STBMV");
errors += clblast::RunTests<clblast::TestXtbmv<double>, double, double>(argc, argv, true, "DTBMV");
- errors += clblast::RunTests<clblast::TestXtbmv<float2>, float2, float2>(argc, argv, true, "CTBMV");
- errors += clblast::RunTests<clblast::TestXtbmv<double2>, double2, double2>(argc, argv, true, "ZTBMV");
+ errors += clblast::RunTests<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBMV");
+ errors += clblast::RunTests<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBMV");
errors += clblast::RunTests<clblast::TestXtbmv<half>, half, half>(argc, argv, true, "HTBMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtbsv.cpp b/test/correctness/routines/level2/xtbsv.cpp
index 12b5dca5..5cfc6942 100644
--- a/test/correctness/routines/level2/xtbsv.cpp
+++ b/test/correctness/routines/level2/xtbsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtbsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtbsv<float>, float, float>(argc, argv, false, "STBSV");
errors += clblast::RunTests<clblast::TestXtbsv<double>, double, double>(argc, argv, true, "DTBSV");
- errors += clblast::RunTests<clblast::TestXtbsv<float2>, float2, float2>(argc, argv, true, "CTBSV");
- errors += clblast::RunTests<clblast::TestXtbsv<double2>, double2, double2>(argc, argv, true, "ZTBSV");
+ errors += clblast::RunTests<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTBSV");
+ errors += clblast::RunTests<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTBSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtpmv.cpp b/test/correctness/routines/level2/xtpmv.cpp
index b89f0adc..89056678 100644
--- a/test/correctness/routines/level2/xtpmv.cpp
+++ b/test/correctness/routines/level2/xtpmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtpmv<float>, float, float>(argc, argv, false, "STPMV");
errors += clblast::RunTests<clblast::TestXtpmv<double>, double, double>(argc, argv, true, "DTPMV");
- errors += clblast::RunTests<clblast::TestXtpmv<float2>, float2, float2>(argc, argv, true, "CTPMV");
- errors += clblast::RunTests<clblast::TestXtpmv<double2>, double2, double2>(argc, argv, true, "ZTPMV");
+ errors += clblast::RunTests<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPMV");
+ errors += clblast::RunTests<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPMV");
errors += clblast::RunTests<clblast::TestXtpmv<half>, half, half>(argc, argv, true, "HTPMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtpsv.cpp b/test/correctness/routines/level2/xtpsv.cpp
index 6e6e7c85..28c9fe39 100644
--- a/test/correctness/routines/level2/xtpsv.cpp
+++ b/test/correctness/routines/level2/xtpsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtpsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtpsv<float>, float, float>(argc, argv, false, "STPSV");
errors += clblast::RunTests<clblast::TestXtpsv<double>, double, double>(argc, argv, true, "DTPSV");
- errors += clblast::RunTests<clblast::TestXtpsv<float2>, float2, float2>(argc, argv, true, "CTPSV");
- errors += clblast::RunTests<clblast::TestXtpsv<double2>, double2, double2>(argc, argv, true, "ZTPSV");
+ errors += clblast::RunTests<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTPSV");
+ errors += clblast::RunTests<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTPSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtrmv.cpp b/test/correctness/routines/level2/xtrmv.cpp
index 819f5cad..b1a414af 100644
--- a/test/correctness/routines/level2/xtrmv.cpp
+++ b/test/correctness/routines/level2/xtrmv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtrmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrmv<float>, float, float>(argc, argv, false, "STRMV");
errors += clblast::RunTests<clblast::TestXtrmv<double>, double, double>(argc, argv, true, "DTRMV");
- errors += clblast::RunTests<clblast::TestXtrmv<float2>, float2, float2>(argc, argv, true, "CTRMV");
- errors += clblast::RunTests<clblast::TestXtrmv<double2>, double2, double2>(argc, argv, true, "ZTRMV");
+ errors += clblast::RunTests<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMV");
+ errors += clblast::RunTests<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMV");
errors += clblast::RunTests<clblast::TestXtrmv<half>, half, half>(argc, argv, true, "HTRMV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level2/xtrsv.cpp b/test/correctness/routines/level2/xtrsv.cpp
index 78e33807..b35d7fc7 100644
--- a/test/correctness/routines/level2/xtrsv.cpp
+++ b/test/correctness/routines/level2/xtrsv.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level2/xtrsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrsv<float>, float, float>(argc, argv, false, "STRSV");
errors += clblast::RunTests<clblast::TestXtrsv<double>, double, double>(argc, argv, true, "DTRSV");
- errors += clblast::RunTests<clblast::TestXtrsv<float2>, float2, float2>(argc, argv, true, "CTRSV");
- errors += clblast::RunTests<clblast::TestXtrsv<double2>, double2, double2>(argc, argv, true, "ZTRSV");
+ errors += clblast::RunTests<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSV");
+ errors += clblast::RunTests<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSV");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xgemm.cpp b/test/correctness/routines/level3/xgemm.cpp
index 54d41719..7fda5f2d 100644
--- a/test/correctness/routines/level3/xgemm.cpp
+++ b/test/correctness/routines/level3/xgemm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xgemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemm<float>, float, float>(argc, argv, false, "SGEMM");
errors += clblast::RunTests<clblast::TestXgemm<double>, double, double>(argc, argv, true, "DGEMM");
- errors += clblast::RunTests<clblast::TestXgemm<float2>, float2, float2>(argc, argv, true, "CGEMM");
- errors += clblast::RunTests<clblast::TestXgemm<double2>, double2, double2>(argc, argv, true, "ZGEMM");
+ errors += clblast::RunTests<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMM");
+ errors += clblast::RunTests<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMM");
errors += clblast::RunTests<clblast::TestXgemm<half>, half, half>(argc, argv, true, "HGEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xhemm.cpp b/test/correctness/routines/level3/xhemm.cpp
index 76c970a7..cbd277e2 100644
--- a/test/correctness/routines/level3/xhemm.cpp
+++ b/test/correctness/routines/level3/xhemm.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xhemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXhemm<float2>, float2, float2>(argc, argv, false, "CHEMM");
- errors += clblast::RunTests<clblast::TestXhemm<double2>, double2, double2>(argc, argv, true, "ZHEMM");
+ errors += clblast::RunTests<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, false, "CHEMM");
+ errors += clblast::RunTests<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZHEMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xher2k.cpp b/test/correctness/routines/level3/xher2k.cpp
index c653265e..e21a429c 100644
--- a/test/correctness/routines/level3/xher2k.cpp
+++ b/test/correctness/routines/level3/xher2k.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xher2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXher2k<float2,float>, float2, float>(argc, argv, false, "CHER2K");
- errors += clblast::RunTests<clblast::TestXher2k<double2,double>, double2, double>(argc, argv, true, "ZHER2K");
+ errors += clblast::RunTests<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHER2K");
+ errors += clblast::RunTests<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHER2K");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xherk.cpp b/test/correctness/routines/level3/xherk.cpp
index 09ea9e4d..5665147e 100644
--- a/test/correctness/routines/level3/xherk.cpp
+++ b/test/correctness/routines/level3/xherk.cpp
@@ -12,15 +12,11 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xherk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
- errors += clblast::RunTests<clblast::TestXherk<float2,float>, float2, float>(argc, argv, false, "CHERK");
- errors += clblast::RunTests<clblast::TestXherk<double2,double>, double2, double>(argc, argv, true, "ZHERK");
+ errors += clblast::RunTests<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv, false, "CHERK");
+ errors += clblast::RunTests<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv, true, "ZHERK");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsymm.cpp b/test/correctness/routines/level3/xsymm.cpp
index 3cb3515a..3e745d24 100644
--- a/test/correctness/routines/level3/xsymm.cpp
+++ b/test/correctness/routines/level3/xsymm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsymm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsymm<float>, float, float>(argc, argv, false, "SSYMM");
errors += clblast::RunTests<clblast::TestXsymm<double>, double, double>(argc, argv, true, "DSYMM");
- errors += clblast::RunTests<clblast::TestXsymm<float2>, float2, float2>(argc, argv, true, "CSYMM");
- errors += clblast::RunTests<clblast::TestXsymm<double2>, double2, double2>(argc, argv, true, "ZSYMM");
+ errors += clblast::RunTests<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYMM");
+ errors += clblast::RunTests<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYMM");
errors += clblast::RunTests<clblast::TestXsymm<half>, half, half>(argc, argv, true, "HSYMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsyr2k.cpp b/test/correctness/routines/level3/xsyr2k.cpp
index 617af04d..b3027063 100644
--- a/test/correctness/routines/level3/xsyr2k.cpp
+++ b/test/correctness/routines/level3/xsyr2k.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsyr2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsyr2k<float>, float, float>(argc, argv, false, "SSYR2K");
errors += clblast::RunTests<clblast::TestXsyr2k<double>, double, double>(argc, argv, true, "DSYR2K");
- errors += clblast::RunTests<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv, true, "CSYR2K");
- errors += clblast::RunTests<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv, true, "ZSYR2K");
+ errors += clblast::RunTests<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYR2K");
+ errors += clblast::RunTests<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYR2K");
errors += clblast::RunTests<clblast::TestXsyr2k<half>, half, half>(argc, argv, true, "HSYR2K");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xsyrk.cpp b/test/correctness/routines/level3/xsyrk.cpp
index 2014b8d0..26c0db41 100644
--- a/test/correctness/routines/level3/xsyrk.cpp
+++ b/test/correctness/routines/level3/xsyrk.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xsyrk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXsyrk<float>, float, float>(argc, argv, false, "SSYRK");
errors += clblast::RunTests<clblast::TestXsyrk<double>, double, double>(argc, argv, true, "DSYRK");
- errors += clblast::RunTests<clblast::TestXsyrk<float2>, float2, float2>(argc, argv, true, "CSYRK");
- errors += clblast::RunTests<clblast::TestXsyrk<double2>, double2, double2>(argc, argv, true, "ZSYRK");
+ errors += clblast::RunTests<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CSYRK");
+ errors += clblast::RunTests<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZSYRK");
errors += clblast::RunTests<clblast::TestXsyrk<half>, half, half>(argc, argv, true, "HSYRK");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xtrmm.cpp b/test/correctness/routines/level3/xtrmm.cpp
index 32640d52..63d17ed5 100644
--- a/test/correctness/routines/level3/xtrmm.cpp
+++ b/test/correctness/routines/level3/xtrmm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xtrmm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrmm<float>, float, float>(argc, argv, false, "STRMM");
errors += clblast::RunTests<clblast::TestXtrmm<double>, double, double>(argc, argv, true, "DTRMM");
- errors += clblast::RunTests<clblast::TestXtrmm<float2>, float2, float2>(argc, argv, true, "CTRMM");
- errors += clblast::RunTests<clblast::TestXtrmm<double2>, double2, double2>(argc, argv, true, "ZTRMM");
+ errors += clblast::RunTests<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRMM");
+ errors += clblast::RunTests<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRMM");
errors += clblast::RunTests<clblast::TestXtrmm<half>, half, half>(argc, argv, true, "HTRMM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/level3/xtrsm.cpp b/test/correctness/routines/level3/xtrsm.cpp
index bc45a8bf..dcc20060 100644
--- a/test/correctness/routines/level3/xtrsm.cpp
+++ b/test/correctness/routines/level3/xtrsm.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/level3/xtrsm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXtrsm<float>, float, float>(argc, argv, false, "STRSM");
errors += clblast::RunTests<clblast::TestXtrsm<double>, double, double>(argc, argv, true, "DTRSM");
- errors += clblast::RunTests<clblast::TestXtrsm<float2>, float2, float2>(argc, argv, true, "CTRSM");
- errors += clblast::RunTests<clblast::TestXtrsm<double2>, double2, double2>(argc, argv, true, "ZTRSM");
+ errors += clblast::RunTests<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CTRSM");
+ errors += clblast::RunTests<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZTRSM");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xaxpybatched.cpp b/test/correctness/routines/levelx/xaxpybatched.cpp
index a106440f..3b906217 100644
--- a/test/correctness/routines/levelx/xaxpybatched.cpp
+++ b/test/correctness/routines/levelx/xaxpybatched.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xaxpybatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXaxpyBatched<float>, float, float>(argc, argv, false, "SAXPYBATCHED");
errors += clblast::RunTests<clblast::TestXaxpyBatched<double>, double, double>(argc, argv, true, "DAXPYBATCHED");
- errors += clblast::RunTests<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv, true, "CAXPYBATCHED");
- errors += clblast::RunTests<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv, true, "ZAXPYBATCHED");
+ errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CAXPYBATCHED");
+ errors += clblast::RunTests<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZAXPYBATCHED");
errors += clblast::RunTests<clblast::TestXaxpyBatched<half>, half, half>(argc, argv, true, "HAXPYBATCHED");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xgemmbatched.cpp b/test/correctness/routines/levelx/xgemmbatched.cpp
index 748e1bb7..1e931fd5 100644
--- a/test/correctness/routines/levelx/xgemmbatched.cpp
+++ b/test/correctness/routines/levelx/xgemmbatched.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xgemmbatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXgemmBatched<float>, float, float>(argc, argv, false, "SGEMMBATCHED");
errors += clblast::RunTests<clblast::TestXgemmBatched<double>, double, double>(argc, argv, true, "DGEMMBATCHED");
- errors += clblast::RunTests<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv, true, "CGEMMBATCHED");
- errors += clblast::RunTests<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv, true, "ZGEMMBATCHED");
+ errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "CGEMMBATCHED");
+ errors += clblast::RunTests<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZGEMMBATCHED");
errors += clblast::RunTests<clblast::TestXgemmBatched<half>, half, half>(argc, argv, true, "HGEMMBATCHED");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/routines/levelx/xomatcopy.cpp b/test/correctness/routines/levelx/xomatcopy.cpp
index e034bc18..f512432b 100644
--- a/test/correctness/routines/levelx/xomatcopy.cpp
+++ b/test/correctness/routines/levelx/xomatcopy.cpp
@@ -12,17 +12,13 @@
#include "test/correctness/testblas.hpp"
#include "test/routines/levelx/xomatcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
auto errors = size_t{0};
errors += clblast::RunTests<clblast::TestXomatcopy<float>, float, float>(argc, argv, false, "SOMATCOPY");
errors += clblast::RunTests<clblast::TestXomatcopy<double>, double, double>(argc, argv, true, "DOMATCOPY");
- errors += clblast::RunTests<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv, true, "COMATCOPY");
- errors += clblast::RunTests<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv, true, "ZOMATCOPY");
+ errors += clblast::RunTests<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv, true, "COMATCOPY");
+ errors += clblast::RunTests<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv, true, "ZOMATCOPY");
errors += clblast::RunTests<clblast::TestXomatcopy<half>, half, half>(argc, argv, true, "HOMATCOPY");
if (errors > 0) { return 1; } else { return 0; }
}
diff --git a/test/correctness/tester.cpp b/test/correctness/tester.cpp
index 40784fdb..d1f3cbb2 100644
--- a/test/correctness/tester.cpp
+++ b/test/correctness/tester.cpp
@@ -116,24 +116,44 @@ Tester<T,U>::Tester(const std::vector<std::string> &arguments, const bool silent
tests_failed_{0} {
options_ = options;
+ // Determines which reference is the default
+ #if defined(CLBLAST_REF_CBLAS)
+ auto default_cblas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CLBLAS)
+ auto default_clblas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CUBLAS)
+ auto default_cublas = 0;
+ #endif
+ #if defined(CLBLAST_REF_CBLAS)
+ default_cblas = 1;
+ #elif defined(CLBLAST_REF_CLBLAS)
+ default_clblas = 1;
+ #elif defined(CLBLAST_REF_CUBLAS)
+ default_cublas = 1;
+ #endif
+
// Determines which reference to test against
- #if defined(CLBLAST_REF_CLBLAS) && defined(CLBLAST_REF_CBLAS)
- compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 0);
- compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1);
- #elif CLBLAST_REF_CLBLAS
- compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, 1);
- compare_cblas_ = 0;
- #elif CLBLAST_REF_CBLAS
- compare_clblas_ = 0;
- compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, 1);
- #else
- compare_clblas_ = 0;
- compare_cblas_ = 0;
+ compare_clblas_ = 0;
+ compare_cblas_ = 0;
+ compare_cublas_ = 0;
+ #if defined(CLBLAST_REF_CBLAS)
+ compare_cblas_ = GetArgument(arguments, help_, kArgComparecblas, default_cblas);
+ #endif
+ #if defined(CLBLAST_REF_CLBLAS)
+ compare_clblas_ = GetArgument(arguments, help_, kArgCompareclblas, default_clblas);
+ #endif
+ #if defined(CLBLAST_REF_CUBLAS)
+ compare_cublas_ = GetArgument(arguments, help_, kArgComparecublas, default_cublas);
#endif
// Prints the help message (command-line arguments)
if (!silent) { fprintf(stdout, "\n* %s\n", help_.c_str()); }
+ // Support for cuBLAS not available yet
+ if (compare_cublas_) { throw std::runtime_error("Cannot test against cuBLAS; not implemented yet"); }
+
// Can only test against a single reference (not two, not zero)
if (compare_clblas_ && compare_cblas_) {
throw std::runtime_error("Cannot test against both clBLAS and CBLAS references; choose one using the -cblas and -clblas arguments");
diff --git a/test/correctness/tester.hpp b/test/correctness/tester.hpp
index f60be04b..8cfa702f 100644
--- a/test/correctness/tester.hpp
+++ b/test/correctness/tester.hpp
@@ -113,6 +113,7 @@ class Tester {
// Testing against reference implementations
int compare_cblas_;
int compare_clblas_;
+ int compare_cublas_;
private:
diff --git a/test/performance/client.cpp b/test/performance/client.cpp
index 48d6708e..dc98ffbd 100644
--- a/test/performance/client.cpp
+++ b/test/performance/client.cpp
@@ -30,13 +30,14 @@ template <typename T, typename U> const int Client<T,U>::kSeed = 42; // fixed se
template <typename T, typename U>
Client<T,U>::Client(const Routine run_routine,
const Reference1 run_reference1, const Reference2 run_reference2,
- const std::vector<std::string> &options,
+ const Reference3 run_reference3, const std::vector<std::string> &options,
const std::vector<std::string> &buffers_in,
const std::vector<std::string> &buffers_out,
const GetMetric get_flops, const GetMetric get_bytes):
run_routine_(run_routine),
run_reference1_(run_reference1),
run_reference2_(run_reference2),
+ run_reference3_(run_reference3),
options_(options),
buffers_in_(buffers_in),
buffers_out_(buffers_out),
@@ -119,6 +120,11 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le
#else
args.compare_cblas = 0;
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ args.compare_cublas = GetArgument(command_line_args, help, kArgComparecublas, 1);
+ #else
+ args.compare_cublas = 0;
+ #endif
args.step = GetArgument(command_line_args, help, kArgStepSize, size_t{1});
args.num_steps = GetArgument(command_line_args, help, kArgNumSteps, size_t{0});
args.num_runs = GetArgument(command_line_args, help, kArgNumRuns, size_t{10});
@@ -133,24 +139,26 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const size_t le
// Comparison against a non-BLAS routine is not supported
if (level == 4) { // level-4 == level-X
- if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) {
if (!args.silent) {
- fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for this non-BLAS routine\n\n");
+ fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for this non-BLAS routine\n\n");
}
}
args.compare_clblas = 0;
args.compare_cblas = 0;
+ args.compare_cublas = 0;
}
- // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
+ // Comparison against other BLAS libraries is not supported in case of half-precision
if (args.precision == Precision::kHalf) {
- if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0 || args.compare_cublas != 0) {
if (!args.silent) {
- fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
+ fprintf(stdout, "* Disabling clBLAS/CBLAS/cuBLAS comparisons for half-precision\n\n");
}
}
args.compare_clblas = 0;
args.compare_cblas = 0;
+ args.compare_cublas = 0;
}
// Returns the arguments
@@ -174,6 +182,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
#ifdef CLBLAST_REF_CLBLAS
if (args.compare_clblas) { clblasSetup(); }
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ if (args.compare_cublas) { cublasSetup(args); }
+ #endif
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
@@ -232,6 +243,16 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
timings.push_back(std::pair<std::string, double>("CPU BLAS", ms_cblas));
}
+ if (args.compare_cublas) {
+ auto buffers_host = BuffersHost<T>();
+ auto buffers_cuda = BuffersCUDA<T>();
+ DeviceToHost(args, buffers, buffers_host, queue, buffers_in_);
+ HostToCUDA(args, buffers_cuda, buffers_host, buffers_in_);
+ auto ms_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
+ CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
+ HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
+ timings.push_back(std::pair<std::string, double>("cuBLAS", ms_cublas));
+ }
// Prints the performance of the tested libraries
PrintTableRow(args, timings);
@@ -251,6 +272,9 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
#ifdef CLBLAST_REF_CLBLAS
if (args.compare_clblas) { clblasTeardown(); }
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ if (args.compare_cublas) { cublasTeardown(args); }
+ #endif
}
// =================================================================================================
@@ -307,6 +331,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) {
fprintf(stdout, " | <-- CLBlast -->");
if (args.compare_clblas) { fprintf(stdout, " | <-- clBLAS -->"); }
if (args.compare_cblas) { fprintf(stdout, " | <-- CPU BLAS -->"); }
+ if (args.compare_cublas) { fprintf(stdout, " | <-- cuBLAS -->"); }
fprintf(stdout, " |\n");
}
@@ -315,6 +340,7 @@ void Client<T,U>::PrintTableHeader(const Arguments<U>& args) {
fprintf(stdout, "%9s;%9s;%9s", "ms_1", "GFLOPS_1", "GBs_1");
if (args.compare_clblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_2", "GFLOPS_2", "GBs_2"); }
if (args.compare_cblas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_3", "GFLOPS_3", "GBs_3"); }
+ if (args.compare_cublas) { fprintf(stdout, ";%9s;%9s;%9s", "ms_4", "GFLOPS_4", "GBs_4"); }
fprintf(stdout, "\n");
}
diff --git a/test/performance/client.hpp b/test/performance/client.hpp
index 12fd113d..47a13017 100644
--- a/test/performance/client.hpp
+++ b/test/performance/client.hpp
@@ -31,6 +31,7 @@
#ifdef CLBLAST_REF_CLBLAS
#include <clBLAS.h>
#endif
+#include "test/wrapper_cuda.hpp"
#include "clblast.h"
namespace clblast {
@@ -46,12 +47,13 @@ class Client {
using Routine = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
using Reference1 = std::function<StatusCode(const Arguments<U>&, Buffers<T>&, Queue&)>;
using Reference2 = std::function<StatusCode(const Arguments<U>&, BuffersHost<T>&, Queue&)>;
+ using Reference3 = std::function<StatusCode(const Arguments<U>&, BuffersCUDA<T>&, Queue&)>;
using SetMetric = std::function<void(Arguments<U>&)>;
using GetMetric = std::function<size_t(const Arguments<U>&)>;
// The constructor
Client(const Routine run_routine, const Reference1 run_reference1, const Reference2 run_reference2,
- const std::vector<std::string> &options,
+ const Reference3 run_reference3, const std::vector<std::string> &options,
const std::vector<std::string> &buffers_in, const std::vector<std::string> &buffers_out,
const GetMetric get_flops, const GetMetric get_bytes);
@@ -84,6 +86,7 @@ class Client {
const Routine run_routine_;
const Reference1 run_reference1_;
const Reference2 run_reference2_;
+ const Reference3 run_reference3_;
const std::vector<std::string> options_;
const std::vector<std::string> buffers_in_;
const std::vector<std::string> buffers_out_;
@@ -118,9 +121,14 @@ void RunClient(int argc, char *argv[]) {
#else
auto reference2 = ReferenceNotAvailable<T,U,BuffersHost<T>>;
#endif
+ #ifdef CLBLAST_REF_CUBLAS
+ auto reference3 = C::RunReference3; // cuBLAS when available
+ #else
+ auto reference3 = ReferenceNotAvailable<T,U,BuffersCUDA<T>>;
+ #endif
// Creates a new client
- auto client = Client<T,U>(C::RunRoutine, reference1, reference2, C::GetOptions(),
+ auto client = Client<T,U>(C::RunRoutine, reference1, reference2, reference3, C::GetOptions(),
C::BuffersIn(), C::BuffersOut(), C::GetFlops, C::GetBytes);
// Simple command line argument parser with defaults
diff --git a/test/performance/routines/level1/xamax.cpp b/test/performance/routines/level1/xamax.cpp
index 5dc7b3d9..5cbef604 100644
--- a/test/performance/routines/level1/xamax.cpp
+++ b/test/performance/routines/level1/xamax.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xamax.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXamax<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXamax<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXamax<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXamax<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXamax<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xasum.cpp b/test/performance/routines/level1/xasum.cpp
index bf5b2fa9..7fccb678 100644
--- a/test/performance/routines/level1/xasum.cpp
+++ b/test/performance/routines/level1/xasum.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xasum.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXasum<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXasum<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXasum<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXasum<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXasum<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xaxpy.cpp b/test/performance/routines/level1/xaxpy.cpp
index faccc089..739408bb 100644
--- a/test/performance/routines/level1/xaxpy.cpp
+++ b/test/performance/routines/level1/xaxpy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xaxpy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXaxpy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXaxpy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXaxpy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xcopy.cpp b/test/performance/routines/level1/xcopy.cpp
index 8aa536af..902c394f 100644
--- a/test/performance/routines/level1/xcopy.cpp
+++ b/test/performance/routines/level1/xcopy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXcopy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXcopy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXcopy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xdot.cpp b/test/performance/routines/level1/xdot.cpp
index 9a570e1e..b2d4d969 100644
--- a/test/performance/routines/level1/xdot.cpp
+++ b/test/performance/routines/level1/xdot.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xdotc.cpp b/test/performance/routines/level1/xdotc.cpp
index 426b81ae..308bcdab 100644
--- a/test/performance/routines/level1/xdotc.cpp
+++ b/test/performance/routines/level1/xdotc.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdotc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXdotc<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXdotc<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xdotu.cpp b/test/performance/routines/level1/xdotu.cpp
index 4fbe167d..fc54a8a5 100644
--- a/test/performance/routines/level1/xdotu.cpp
+++ b/test/performance/routines/level1/xdotu.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xdotu.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXdotu<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotu<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXdotu<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXdotu<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xnrm2.cpp b/test/performance/routines/level1/xnrm2.cpp
index 6a1cdcc7..769335eb 100644
--- a/test/performance/routines/level1/xnrm2.cpp
+++ b/test/performance/routines/level1/xnrm2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xnrm2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXnrm2<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXnrm2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXnrm2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXnrm2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXnrm2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xrot.cpp b/test/performance/routines/level1/xrot.cpp
index 2b94ca39..f010e04a 100644
--- a/test/performance/routines/level1/xrot.cpp
+++ b/test/performance/routines/level1/xrot.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrot.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotg.cpp b/test/performance/routines/level1/xrotg.cpp
index ee6fc44b..4c8d33cf 100644
--- a/test/performance/routines/level1/xrotg.cpp
+++ b/test/performance/routines/level1/xrotg.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotm.cpp b/test/performance/routines/level1/xrotm.cpp
index e8d73311..bc2111b3 100644
--- a/test/performance/routines/level1/xrotm.cpp
+++ b/test/performance/routines/level1/xrotm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xrotmg.cpp b/test/performance/routines/level1/xrotmg.cpp
index a5266b14..fb568243 100644
--- a/test/performance/routines/level1/xrotmg.cpp
+++ b/test/performance/routines/level1/xrotmg.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xrotmg.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level1/xscal.cpp b/test/performance/routines/level1/xscal.cpp
index 6fefc5d0..b9db60cf 100644
--- a/test/performance/routines/level1/xscal.cpp
+++ b/test/performance/routines/level1/xscal.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xscal.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXscal<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXscal<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXscal<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXscal<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXscal<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level1/xswap.cpp b/test/performance/routines/level1/xswap.cpp
index b728b8f4..db40f6e4 100644
--- a/test/performance/routines/level1/xswap.cpp
+++ b/test/performance/routines/level1/xswap.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level1/xswap.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXswap<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXswap<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXswap<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXswap<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXswap<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgbmv.cpp b/test/performance/routines/level2/xgbmv.cpp
index 6a4b01f8..23a91503 100644
--- a/test/performance/routines/level2/xgbmv.cpp
+++ b/test/performance/routines/level2/xgbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgbmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgemv.cpp b/test/performance/routines/level2/xgemv.cpp
index 335d5ef1..3bb14b68 100644
--- a/test/performance/routines/level2/xgemv.cpp
+++ b/test/performance/routines/level2/xgemv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xger.cpp b/test/performance/routines/level2/xger.cpp
index 50fdb9e6..ca23b8f0 100644
--- a/test/performance/routines/level2/xger.cpp
+++ b/test/performance/routines/level2/xger.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xger.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xgerc.cpp b/test/performance/routines/level2/xgerc.cpp
index 67c72285..0423cdd5 100644
--- a/test/performance/routines/level2/xgerc.cpp
+++ b/test/performance/routines/level2/xgerc.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgerc.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgerc<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgerc<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgerc<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgerc<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xgeru.cpp b/test/performance/routines/level2/xgeru.cpp
index 6e845bb8..c0fbb2d5 100644
--- a/test/performance/routines/level2/xgeru.cpp
+++ b/test/performance/routines/level2/xgeru.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xgeru.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgeru<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgeru<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgeru<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgeru<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhbmv.cpp b/test/performance/routines/level2/xhbmv.cpp
index 600317c1..d59cba26 100644
--- a/test/performance/routines/level2/xhbmv.cpp
+++ b/test/performance/routines/level2/xhbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhemv.cpp b/test/performance/routines/level2/xhemv.cpp
index 7700cf7b..1664b6cd 100644
--- a/test/performance/routines/level2/xhemv.cpp
+++ b/test/performance/routines/level2/xhemv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhemv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhemv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhemv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xher.cpp b/test/performance/routines/level2/xher.cpp
index e7276aee..434f486c 100644
--- a/test/performance/routines/level2/xher.cpp
+++ b/test/performance/routines/level2/xher.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xher.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xher2.cpp b/test/performance/routines/level2/xher2.cpp
index b4c53206..cce40a9e 100644
--- a/test/performance/routines/level2/xher2.cpp
+++ b/test/performance/routines/level2/xher2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xher2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpmv.cpp b/test/performance/routines/level2/xhpmv.cpp
index d9683d2e..d88791fe 100644
--- a/test/performance/routines/level2/xhpmv.cpp
+++ b/test/performance/routines/level2/xhpmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpr.cpp b/test/performance/routines/level2/xhpr.cpp
index c4ffaf81..a92a3134 100644
--- a/test/performance/routines/level2/xhpr.cpp
+++ b/test/performance/routines/level2/xhpr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpr<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpr<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xhpr2.cpp b/test/performance/routines/level2/xhpr2.cpp
index 3e5d4004..f34de29b 100644
--- a/test/performance/routines/level2/xhpr2.cpp
+++ b/test/performance/routines/level2/xhpr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xhpr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhpr2<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr2<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhpr2<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhpr2<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xsbmv.cpp b/test/performance/routines/level2/xsbmv.cpp
index 9c0ab3b6..59bbf40c 100644
--- a/test/performance/routines/level2/xsbmv.cpp
+++ b/test/performance/routines/level2/xsbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspmv.cpp b/test/performance/routines/level2/xspmv.cpp
index 6cc4e3ba..9ba29f43 100644
--- a/test/performance/routines/level2/xspmv.cpp
+++ b/test/performance/routines/level2/xspmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspr.cpp b/test/performance/routines/level2/xspr.cpp
index dc45ba6d..57551f5d 100644
--- a/test/performance/routines/level2/xspr.cpp
+++ b/test/performance/routines/level2/xspr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xspr2.cpp b/test/performance/routines/level2/xspr2.cpp
index 3c9a769f..573fb652 100644
--- a/test/performance/routines/level2/xspr2.cpp
+++ b/test/performance/routines/level2/xspr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xspr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsymv.cpp b/test/performance/routines/level2/xsymv.cpp
index aaa98c8b..25933d8d 100644
--- a/test/performance/routines/level2/xsymv.cpp
+++ b/test/performance/routines/level2/xsymv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsymv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsyr.cpp b/test/performance/routines/level2/xsyr.cpp
index d710bf63..3b54510d 100644
--- a/test/performance/routines/level2/xsyr.cpp
+++ b/test/performance/routines/level2/xsyr.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsyr.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xsyr2.cpp b/test/performance/routines/level2/xsyr2.cpp
index 39b46b6a..ab9641c2 100644
--- a/test/performance/routines/level2/xsyr2.cpp
+++ b/test/performance/routines/level2/xsyr2.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xsyr2.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
diff --git a/test/performance/routines/level2/xtbmv.cpp b/test/performance/routines/level2/xtbmv.cpp
index 5fb3ea14..319f9c80 100644
--- a/test/performance/routines/level2/xtbmv.cpp
+++ b/test/performance/routines/level2/xtbmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtbmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtbmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtbmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtbmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtbsv.cpp b/test/performance/routines/level2/xtbsv.cpp
index 7b88917c..4d37e76d 100644
--- a/test/performance/routines/level2/xtbsv.cpp
+++ b/test/performance/routines/level2/xtbsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtbsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtbsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtbsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtbsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtbsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtpmv.cpp b/test/performance/routines/level2/xtpmv.cpp
index 907749a7..c2db51b1 100644
--- a/test/performance/routines/level2/xtpmv.cpp
+++ b/test/performance/routines/level2/xtpmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtpmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtpmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtpmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtpmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtpsv.cpp b/test/performance/routines/level2/xtpsv.cpp
index 0dab8ff6..b01a9f05 100644
--- a/test/performance/routines/level2/xtpsv.cpp
+++ b/test/performance/routines/level2/xtpsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtpsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtpsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtpsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtpsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtpsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtrmv.cpp b/test/performance/routines/level2/xtrmv.cpp
index c2c6f232..610a5052 100644
--- a/test/performance/routines/level2/xtrmv.cpp
+++ b/test/performance/routines/level2/xtrmv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtrmv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrmv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrmv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrmv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level2/xtrsv.cpp b/test/performance/routines/level2/xtrsv.cpp
index 49e477f7..02255e71 100644
--- a/test/performance/routines/level2/xtrsv.cpp
+++ b/test/performance/routines/level2/xtrsv.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level2/xtrsv.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrsv<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrsv<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsv<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrsv<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsv<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xgemm.cpp b/test/performance/routines/level3/xgemm.cpp
index deb2493f..602e1a20 100644
--- a/test/performance/routines/level3/xgemm.cpp
+++ b/test/performance/routines/level3/xgemm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xgemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xhemm.cpp b/test/performance/routines/level3/xhemm.cpp
index 975c672f..6c3687a9 100644
--- a/test/performance/routines/level3/xhemm.cpp
+++ b/test/performance/routines/level3/xhemm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xhemm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXhemm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXhemm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXhemm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xher2k.cpp b/test/performance/routines/level3/xher2k.cpp
index d579d4f9..9d3385f7 100644
--- a/test/performance/routines/level3/xher2k.cpp
+++ b/test/performance/routines/level3/xher2k.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xher2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXher2k<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2k<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXher2k<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXher2k<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xherk.cpp b/test/performance/routines/level3/xherk.cpp
index 94411e5a..ae6e774e 100644
--- a/test/performance/routines/level3/xherk.cpp
+++ b/test/performance/routines/level3/xherk.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xherk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -24,9 +20,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kSingle: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kDouble: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXherk<float2,float>, float2, float>(argc, argv); break;
+ clblast::RunClient<clblast::TestXherk<clblast::float2,float>, clblast::float2, float>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXherk<double2,double>, double2, double>(argc, argv); break;
+ clblast::RunClient<clblast::TestXherk<clblast::double2,double>, clblast::double2, double>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsymm.cpp b/test/performance/routines/level3/xsymm.cpp
index 04ae8eb0..ba3b6ab2 100644
--- a/test/performance/routines/level3/xsymm.cpp
+++ b/test/performance/routines/level3/xsymm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsymm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsymm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsymm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsymm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsymm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsymm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsyr2k.cpp b/test/performance/routines/level3/xsyr2k.cpp
index 7b8b6f4f..150a4192 100644
--- a/test/performance/routines/level3/xsyr2k.cpp
+++ b/test/performance/routines/level3/xsyr2k.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsyr2k.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyr2k<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyr2k<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyr2k<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xsyrk.cpp b/test/performance/routines/level3/xsyrk.cpp
index ea0fc33b..00cef52b 100644
--- a/test/performance/routines/level3/xsyrk.cpp
+++ b/test/performance/routines/level3/xsyrk.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xsyrk.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyrk<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXsyrk<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyrk<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXsyrk<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXsyrk<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xtrmm.cpp b/test/performance/routines/level3/xtrmm.cpp
index 7a29e111..fb54a410 100644
--- a/test/performance/routines/level3/xtrmm.cpp
+++ b/test/performance/routines/level3/xtrmm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xtrmm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrmm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrmm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrmm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrmm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/level3/xtrsm.cpp b/test/performance/routines/level3/xtrsm.cpp
index 342274b7..f44265f2 100644
--- a/test/performance/routines/level3/xtrsm.cpp
+++ b/test/performance/routines/level3/xtrsm.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/level3/xtrsm.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -26,9 +22,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXtrsm<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXtrsm<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsm<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXtrsm<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXtrsm<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xaxpybatched.cpp b/test/performance/routines/levelx/xaxpybatched.cpp
index 6d3bcb51..7c09cd5b 100644
--- a/test/performance/routines/levelx/xaxpybatched.cpp
+++ b/test/performance/routines/levelx/xaxpybatched.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xaxpybatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXaxpyBatched<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXaxpyBatched<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpyBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXaxpyBatched<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXaxpyBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xgemmbatched.cpp b/test/performance/routines/levelx/xgemmbatched.cpp
index c9477fad..f4c860d8 100644
--- a/test/performance/routines/levelx/xgemmbatched.cpp
+++ b/test/performance/routines/levelx/xgemmbatched.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xgemmbatched.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemmBatched<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXgemmBatched<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemmBatched<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXgemmBatched<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXgemmBatched<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/performance/routines/levelx/xomatcopy.cpp b/test/performance/routines/levelx/xomatcopy.cpp
index 5821c3b8..568f22e6 100644
--- a/test/performance/routines/levelx/xomatcopy.cpp
+++ b/test/performance/routines/levelx/xomatcopy.cpp
@@ -12,10 +12,6 @@
#include "test/performance/client.hpp"
#include "test/routines/levelx/xomatcopy.hpp"
-// Shortcuts to the clblast namespace
-using float2 = clblast::float2;
-using double2 = clblast::double2;
-
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
const auto command_line_args = clblast::RetrieveCommandLineArguments(argc, argv);
@@ -27,9 +23,9 @@ int main(int argc, char *argv[]) {
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXomatcopy<double>, double, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
- clblast::RunClient<clblast::TestXomatcopy<float2>, float2, float2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXomatcopy<clblast::float2>, clblast::float2, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
- clblast::RunClient<clblast::TestXomatcopy<double2>, double2, double2>(argc, argv); break;
+ clblast::RunClient<clblast::TestXomatcopy<clblast::double2>, clblast::double2, clblast::double2>(argc, argv); break;
}
return 0;
}
diff --git a/test/routines/common.hpp b/test/routines/common.hpp
new file mode 100644
index 00000000..9708288a
--- /dev/null
+++ b/test/routines/common.hpp
@@ -0,0 +1,36 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file contains all the common includes for the clients and tests
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_ROUTINES_COMMON_H_
+#define CLBLAST_TEST_ROUTINES_COMMON_H_
+
+#include <vector>
+#include <string>
+
+#include "utilities/utilities.hpp"
+
+#ifdef CLBLAST_REF_CLBLAS
+ #include "test/wrapper_clblas.hpp"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "test/wrapper_cblas.hpp"
+#endif
+#include "test/wrapper_cuda.hpp"
+#ifdef CLBLAST_REF_CUBLAS
+ #include "test/wrapper_cublas.hpp"
+#endif
+
+// =================================================================================================
+
+// CLBLAST_TEST_ROUTINES_COMMON_H_
+#endif
diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp
index 2e844f2c..04bdaa3d 100644
--- a/test/routines/level1/xamax.hpp
+++ b/test/routines/level1/xamax.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAMAX_H_
#define CLBLAST_TEST_ROUTINES_XAMAX_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXamax {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXamax(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.imax_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp
index 8488bfc6..6add9c64 100644
--- a/test/routines/level1/xasum.hpp
+++ b/test/routines/level1/xasum.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XASUM_H_
#define CLBLAST_TEST_ROUTINES_XASUM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXasum {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXasum(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.asum_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index cc7d251a..17cae6ad 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAXPY_H_
#define CLBLAST_TEST_ROUTINES_XAXPY_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -112,6 +104,16 @@ class TestXaxpy {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXaxpy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp
index 0dbf0f3d..7a5c99b8 100644
--- a/test/routines/level1/xcopy.hpp
+++ b/test/routines/level1/xcopy.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XCOPY_H_
#define CLBLAST_TEST_ROUTINES_XCOPY_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXcopy {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXcopy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp
index bdf2e721..1ea25994 100644
--- a/test/routines/level1/xdot.hpp
+++ b/test/routines/level1/xdot.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOT_H_
#define CLBLAST_TEST_ROUTINES_XDOT_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdot {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdot(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp
index 2cc71b93..c800c1f5 100644
--- a/test/routines/level1/xdotc.hpp
+++ b/test/routines/level1/xdotc.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOTC_H_
#define CLBLAST_TEST_ROUTINES_XDOTC_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdotc {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdotc(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp
index 272e1e31..3545a3a6 100644
--- a/test/routines/level1/xdotu.hpp
+++ b/test/routines/level1/xdotu.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XDOTU_H_
#define CLBLAST_TEST_ROUTINES_XDOTU_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,17 @@ class TestXdotu {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXdotu(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp
index cb1ec683..1db70537 100644
--- a/test/routines/level1/xnrm2.hpp
+++ b/test/routines/level1/xnrm2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XNRM2_H_
#define CLBLAST_TEST_ROUTINES_XNRM2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXnrm2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXnrm2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.scalar, args.nrm2_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.scalar_size, static_cast<T>(0));
diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp
index 3e6b9a38..efa0988d 100644
--- a/test/routines/level1/xscal.hpp
+++ b/test/routines/level1/xscal.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSCAL_H_
#define CLBLAST_TEST_ROUTINES_XSCAL_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -105,6 +97,15 @@ class TestXscal {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXscal(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp
index d9b84dc4..d778cc23 100644
--- a/test/routines/level1/xswap.hpp
+++ b/test/routines/level1/xswap.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSWAP_H_
#define CLBLAST_TEST_ROUTINES_XSWAP_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -111,6 +103,16 @@ class TestXswap {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXswap(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size + args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp
index 990ef49f..23138c77 100644
--- a/test/routines/level2/xgbmv.hpp
+++ b/test/routines/level2/xgbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGBMV_H_
#define CLBLAST_TEST_ROUTINES_XGBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -131,6 +123,19 @@ class TestXgbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ args.m, args.n, args.kl, args.ku, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp
index a007cb62..0ee53b80 100644
--- a/test/routines/level2/xgemv.hpp
+++ b/test/routines/level2/xgemv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMV_H_
#define CLBLAST_TEST_ROUTINES_XGEMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -131,6 +123,19 @@ class TestXgemv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgemv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp
index 5c131e2d..92a1a2ae 100644
--- a/test/routines/level2/xger.hpp
+++ b/test/routines/level2/xger.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGER_H_
#define CLBLAST_TEST_ROUTINES_XGER_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXger {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXger(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp
index e3544424..5d899398 100644
--- a/test/routines/level2/xgerc.hpp
+++ b/test/routines/level2/xgerc.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGERC_H_
#define CLBLAST_TEST_ROUTINES_XGERC_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXgerc {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgerc(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp
index 1d81e292..96dab22e 100644
--- a/test/routines/level2/xgeru.hpp
+++ b/test/routines/level2/xgeru.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGERU_H_
#define CLBLAST_TEST_ROUTINES_XGERU_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,18 @@ class TestXgeru {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgeru(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ args.m, args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp
index 21194fd6..b6844744 100644
--- a/test/routines/level2/xhbmv.hpp
+++ b/test/routines/level2/xhbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHBMV_H_
#define CLBLAST_TEST_ROUTINES_XHBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.kl, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp
index ffef8ff8..e1f23592 100644
--- a/test/routines/level2/xhemv.hpp
+++ b/test/routines/level2/xhemv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHEMV_H_
#define CLBLAST_TEST_ROUTINES_XHEMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhemv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhemv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp
index 083bd3fc..1ac1247b 100644
--- a/test/routines/level2/xher.hpp
+++ b/test/routines/level2/xher.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER_H_
#define CLBLAST_TEST_ROUTINES_XHER_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXher {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXher(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp
index 7bd890a5..18ccc1ac 100644
--- a/test/routines/level2/xher2.hpp
+++ b/test/routines/level2/xher2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER2_H_
#define CLBLAST_TEST_ROUTINES_XHER2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXher2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXher2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp
index 285dd6d3..ad91fe15 100644
--- a/test/routines/level2/xhpmv.hpp
+++ b/test/routines/level2/xhpmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPMV_H_
#define CLBLAST_TEST_ROUTINES_XHPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhpmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp
index 88bae86b..f9d580cd 100644
--- a/test/routines/level2/xhpr.hpp
+++ b/test/routines/level2/xhpr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPR_H_
#define CLBLAST_TEST_ROUTINES_XHPR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXhpr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp
index cd10fa00..f946ba5c 100644
--- a/test/routines/level2/xhpr2.hpp
+++ b/test/routines/level2/xhpr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHPR2_H_
#define CLBLAST_TEST_ROUTINES_XHPR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXhpr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhpr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp
index 5c70aba5..6481d19b 100644
--- a/test/routines/level2/xsbmv.hpp
+++ b/test/routines/level2/xsbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSBMV_H_
#define CLBLAST_TEST_ROUTINES_XSBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.kl, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp
index 560f5baa..9815dbee 100644
--- a/test/routines/level2/xspmv.hpp
+++ b/test/routines/level2/xspmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPMV_H_
#define CLBLAST_TEST_ROUTINES_XSPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXspmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp
index 2e12db33..01a50c38 100644
--- a/test/routines/level2/xspr.hpp
+++ b/test/routines/level2/xspr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPR_H_
#define CLBLAST_TEST_ROUTINES_XSPR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXspr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp
index a7e22227..55f8a141 100644
--- a/test/routines/level2/xspr2.hpp
+++ b/test/routines/level2/xspr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSPR2_H_
#define CLBLAST_TEST_ROUTINES_XSPR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXspr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXspr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.ap_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp
index d9cf9c1e..aec0dfb0 100644
--- a/test/routines/level2/xsymv.hpp
+++ b/test/routines/level2/xsymv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYMV_H_
#define CLBLAST_TEST_ROUTINES_XSYMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsymv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsymv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp
index b60c3a36..78b686d8 100644
--- a/test/routines/level2/xsyr.hpp
+++ b/test/routines/level2/xsyr.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR_H_
#define CLBLAST_TEST_ROUTINES_XSYR_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -118,6 +110,18 @@ class TestXsyr {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp
index dd10a3d0..38aa4f43 100644
--- a/test/routines/level2/xsyr2.hpp
+++ b/test/routines/level2/xsyr2.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR2_H_
#define CLBLAST_TEST_ROUTINES_XSYR2_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -125,6 +117,19 @@ class TestXsyr2 {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr2(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ args.n, args.alpha,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.a_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp
index 7eb8ce9e..8c7aa381 100644
--- a/test/routines/level2/xtbmv.hpp
+++ b/test/routines/level2/xtbmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTBMV_H_
#define CLBLAST_TEST_ROUTINES_XTBMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtbmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtbmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n, args.kl,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp
index 7f4842f0..3afab978 100644
--- a/test/routines/level2/xtpmv.hpp
+++ b/test/routines/level2/xtpmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTPMV_H_
#define CLBLAST_TEST_ROUTINES_XTPMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtpmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtpmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp
index cb7527ed..2b71f151 100644
--- a/test/routines/level2/xtrmv.hpp
+++ b/test/routines/level2/xtrmv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRMV_H_
#define CLBLAST_TEST_ROUTINES_XTRMV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -121,6 +113,20 @@ class TestXtrmv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrmv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index 63d34758..85b50e85 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_
#define CLBLAST_TEST_ROUTINES_XTRSV_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -136,6 +128,20 @@ class TestXtrsv {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrsv(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.n,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.x_size, static_cast<T>(0));
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index a33cbfec..7e0ead6d 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMM_H_
#define CLBLAST_TEST_ROUTINES_XGEMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXgemm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXgemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.b_transpose),
+ args.m, args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp
index 74029c7e..a89617b5 100644
--- a/test/routines/level3/xhemm.hpp
+++ b/test/routines/level3/xhemm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHEMM_H_
#define CLBLAST_TEST_ROUTINES_XHEMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXhemm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXhemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp
index ea13bbc1..55e6d894 100644
--- a/test/routines/level3/xher2k.hpp
+++ b/test/routines/level3/xher2k.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHER2K_H_
#define CLBLAST_TEST_ROUTINES_XHER2K_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -136,6 +128,21 @@ class TestXher2k {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto alpha2 = T{args.alpha, args.alpha};
+ auto status = cublasXher2k(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, alpha2,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp
index b1ce83e0..3e1e7e02 100644
--- a/test/routines/level3/xherk.hpp
+++ b/test/routines/level3/xherk.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XHERK_H_
#define CLBLAST_TEST_ROUTINES_XHERK_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -123,6 +115,19 @@ class TestXherk {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<U> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXherk(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp
index 6ab644b8..5d840d40 100644
--- a/test/routines/level3/xsymm.hpp
+++ b/test/routines/level3/xsymm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYMM_H_
#define CLBLAST_TEST_ROUTINES_XSYMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +127,20 @@ class TestXsymm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsymm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp
index 1400c4e2..4a4a2f10 100644
--- a/test/routines/level3/xsyr2k.hpp
+++ b/test/routines/level3/xsyr2k.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYR2K_H_
#define CLBLAST_TEST_ROUTINES_XSYR2K_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -133,6 +125,20 @@ class TestXsyr2k {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyr2k(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp
index 2df8d6b0..90e46727 100644
--- a/test/routines/level3/xsyrk.hpp
+++ b/test/routines/level3/xsyrk.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XSYRK_H_
#define CLBLAST_TEST_ROUTINES_XSYRK_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -123,6 +115,19 @@ class TestXsyrk {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXsyrk(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ args.n, args.k, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp
index 84adc6e0..acc00e01 100644
--- a/test/routines/level3/xtrmm.hpp
+++ b/test/routines/level3/xtrmm.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRMM_H_
#define CLBLAST_TEST_ROUTINES_XTRMM_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -127,6 +119,21 @@ class TestXtrmm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrmm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.b_size, static_cast<T>(0));
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index de5b307d..d63c9d79 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -16,18 +16,9 @@
#ifndef CLBLAST_TEST_ROUTINES_XTRSM_H_
#define CLBLAST_TEST_ROUTINES_XTRSM_H_
-#include <vector>
-#include <string>
-
+#include "test/routines/common.hpp"
#include "test/routines/level3/xtrsm_data.hpp"
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
-
namespace clblast {
// =================================================================================================
@@ -139,6 +130,21 @@ class TestXtrsm {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ auto status = cublasXtrsm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.side),
+ convertToCUBLAS(args.triangle),
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.diagonal),
+ args.m, args.n, args.alpha,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld);
+ if (status == CUBLAS_STATUS_SUCCESS) { return StatusCode::kSuccess; } else { return StatusCode::kUnknownError; }
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.b_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp
index 05141bbb..5385e86e 100644
--- a/test/routines/levelx/xaxpybatched.hpp
+++ b/test/routines/levelx/xaxpybatched.hpp
@@ -16,17 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_
#define CLBLAST_TEST_ROUTINES_XAXPYBATCHED_H_
-#include <vector>
-#include <string>
-
-#include "utilities/utilities.hpp"
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -135,6 +125,19 @@ class TestXaxpyBatched {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
+ auto status = cublasXaxpy(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.n, args.alphas[batch],
+ buffers.x_vec, args.x_offsets[batch], args.x_inc,
+ buffers.y_vec, args.y_offsets[batch], args.y_inc);
+ if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; }
+ }
+ return StatusCode::kSuccess;
+ }
+ #endif
+
// Describes how to download the results of the computation
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp
index ab5f20c5..ebfd8b19 100644
--- a/test/routines/levelx/xgemmbatched.hpp
+++ b/test/routines/levelx/xgemmbatched.hpp
@@ -16,15 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_
#define CLBLAST_TEST_ROUTINES_XGEMMBATCHED_H_
-#include <vector>
-#include <string>
-
-#ifdef CLBLAST_REF_CLBLAS
- #include "test/wrapper_clblas.hpp"
-#endif
-#ifdef CLBLAST_REF_CBLAS
- #include "test/wrapper_cblas.hpp"
-#endif
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -168,6 +160,23 @@ class TestXgemmBatched {
}
#endif
+ // Describes how to run the cuBLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CUBLAS
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
+ auto status = cublasXgemm(reinterpret_cast<cublasHandle_t>(args.cublas_handle), args.layout,
+ convertToCUBLAS(args.a_transpose),
+ convertToCUBLAS(args.b_transpose),
+ args.m, args.n, args.k, args.alphas[batch],
+ buffers.a_mat, args.a_offsets[batch], args.a_ld,
+ buffers.b_mat, args.b_offsets[batch], args.b_ld, args.betas[batch],
+ buffers.c_mat, args.c_offsets[batch], args.c_ld);
+ if (status != CUBLAS_STATUS_SUCCESS) { return StatusCode::kUnknownError; }
+ }
+ return StatusCode::kSuccess;
+ }
+ #endif
+
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index ffb484b0..cc02a88b 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -16,10 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XINVERT_H_
#define CLBLAST_TEST_ROUTINES_XINVERT_H_
-#include <vector>
-#include <string>
-
-#include "utilities/utilities.hpp"
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -192,6 +189,9 @@ class TestXinvert {
static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
return RunReference(args, buffers_host);
}
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ return StatusCode::kUnknownError;
+ }
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp
index d5973b4c..bbf6006c 100644
--- a/test/routines/levelx/xomatcopy.hpp
+++ b/test/routines/levelx/xomatcopy.hpp
@@ -16,8 +16,7 @@
#ifndef CLBLAST_TEST_ROUTINES_XOMATCOPY_H_
#define CLBLAST_TEST_ROUTINES_XOMATCOPY_H_
-#include <vector>
-#include <string>
+#include "test/routines/common.hpp"
namespace clblast {
// =================================================================================================
@@ -151,6 +150,9 @@ class TestXomatcopy {
static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
return RunReference(args, buffers_host);
}
+ static StatusCode RunReference3(const Arguments<T> &args, BuffersCUDA<T> &buffers, Queue &) {
+ return StatusCode::kUnknownError;
+ }
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/wrapper_cblas.hpp b/test/wrapper_cblas.hpp
index dd610a6c..070d44b5 100644
--- a/test/wrapper_cblas.hpp
+++ b/test/wrapper_cblas.hpp
@@ -94,7 +94,7 @@ void cblasXrot(const size_t n,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
const float cos,
const float sin) {
- cblas_srot(n,
+ cblas_srot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
cos,
@@ -105,7 +105,7 @@ void cblasXrot(const size_t n,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
const double cos,
const double sin) {
- cblas_drot(n,
+ cblas_drot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
cos,
@@ -117,7 +117,7 @@ void cblasXrotm(const size_t n,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& sparam_buffer, const size_t sparam_offset) {
- cblas_srotm(n,
+ cblas_srotm(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
&sparam_buffer[sparam_offset]);
@@ -126,7 +126,7 @@ void cblasXrotm(const size_t n,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& sparam_buffer, const size_t sparam_offset) {
- cblas_drotm(n,
+ cblas_drotm(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
&sparam_buffer[sparam_offset]);
@@ -136,28 +136,28 @@ void cblasXrotm(const size_t n,
void cblasXswap(const size_t n,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_sswap(n,
+ cblas_sswap(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_dswap(n,
+ cblas_dswap(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cswap(n,
+ cblas_cswap(static_cast<int>(n),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zswap(n,
+ cblas_zswap(static_cast<int>(n),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
@@ -177,14 +177,14 @@ void cblasXswap(const size_t n,
void cblasXscal(const size_t n,
const float alpha,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- cblas_sscal(n,
+ cblas_sscal(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXscal(const size_t n,
const double alpha,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- cblas_dscal(n,
+ cblas_dscal(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -192,7 +192,7 @@ void cblasXscal(const size_t n,
const float2 alpha,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
- cblas_cscal(n,
+ cblas_cscal(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -200,7 +200,7 @@ void cblasXscal(const size_t n,
const double2 alpha,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
- cblas_zscal(n,
+ cblas_zscal(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -218,28 +218,28 @@ void cblasXscal(const size_t n,
void cblasXcopy(const size_t n,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_scopy(n,
+ cblas_scopy(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_dcopy(n,
+ cblas_dcopy(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_ccopy(n,
+ cblas_ccopy(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zcopy(n,
+ cblas_zcopy(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
@@ -259,7 +259,7 @@ void cblasXaxpy(const size_t n,
const float alpha,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_saxpy(n,
+ cblas_saxpy(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
@@ -268,7 +268,7 @@ void cblasXaxpy(const size_t n,
const double alpha,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_daxpy(n,
+ cblas_daxpy(static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
@@ -278,7 +278,7 @@ void cblasXaxpy(const size_t n,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
- cblas_caxpy(n,
+ cblas_caxpy(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
@@ -288,7 +288,7 @@ void cblasXaxpy(const size_t n,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
- cblas_zaxpy(n,
+ cblas_zaxpy(static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
@@ -311,7 +311,7 @@ void cblasXdot(const size_t n,
std::vector<float>& dot_buffer, const size_t dot_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
- dot_buffer[dot_offset] = cblas_sdot(n,
+ dot_buffer[dot_offset] = cblas_sdot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
@@ -319,7 +319,7 @@ void cblasXdot(const size_t n,
std::vector<double>& dot_buffer, const size_t dot_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
- dot_buffer[dot_offset] = cblas_ddot(n,
+ dot_buffer[dot_offset] = cblas_ddot(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
@@ -342,7 +342,7 @@ void cblasXdotu(const size_t n,
std::vector<float2>& dot_buffer, const size_t dot_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cdotu_sub(n,
+ cblas_cdotu_sub(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
@@ -351,7 +351,7 @@ void cblasXdotu(const size_t n,
std::vector<double2>& dot_buffer, const size_t dot_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zdotu_sub(n,
+ cblas_zdotu_sub(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
@@ -362,7 +362,7 @@ void cblasXdotc(const size_t n,
std::vector<float2>& dot_buffer, const size_t dot_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<float2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_cdotc_sub(n,
+ cblas_cdotc_sub(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_float>(&dot_buffer[dot_offset]));
@@ -371,7 +371,7 @@ void cblasXdotc(const size_t n,
std::vector<double2>& dot_buffer, const size_t dot_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<double2>& y_buffer, const size_t y_offset, const size_t y_inc) {
- cblas_zdotc_sub(n,
+ cblas_zdotc_sub(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
reinterpret_cast<return_pointer_double>(&dot_buffer[dot_offset]));
@@ -381,25 +381,25 @@ void cblasXdotc(const size_t n,
void cblasXnrm2(const size_t n,
std::vector<float>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset] = cblas_snrm2(n,
+ nrm2_buffer[nrm2_offset] = cblas_snrm2(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXnrm2(const size_t n,
std::vector<double>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset] = cblas_dnrm2(n,
+ nrm2_buffer[nrm2_offset] = cblas_dnrm2(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXnrm2(const size_t n,
std::vector<float2>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset].real(cblas_scnrm2(n,
+ nrm2_buffer[nrm2_offset].real(cblas_scnrm2(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXnrm2(const size_t n,
std::vector<double2>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
+ nrm2_buffer[nrm2_offset].real(cblas_dznrm2(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXnrm2(const size_t n,
@@ -417,25 +417,25 @@ void cblasXnrm2(const size_t n,
void cblasXasum(const size_t n,
std::vector<float>& asum_buffer, const size_t asum_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset] = cblas_sasum(n,
+ asum_buffer[asum_offset] = cblas_sasum(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXasum(const size_t n,
std::vector<double>& asum_buffer, const size_t asum_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset] = cblas_dasum(n,
+ asum_buffer[asum_offset] = cblas_dasum(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXasum(const size_t n,
std::vector<float2>& asum_buffer, const size_t asum_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset].real(cblas_scasum(n,
+ asum_buffer[asum_offset].real(cblas_scasum(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXasum(const size_t n,
std::vector<double2>& asum_buffer, const size_t asum_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- asum_buffer[asum_offset].real(cblas_dzasum(n,
+ asum_buffer[asum_offset].real(cblas_dzasum(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXasum(const size_t n,
@@ -453,25 +453,25 @@ void cblasXasum(const size_t n,
void cblasXamax(const size_t n,
std::vector<float>& imax_buffer, const size_t imax_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_isamax(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<double>& imax_buffer, const size_t imax_offset,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_idamax(static_cast<int>(n),
&x_buffer[x_offset], static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<float2>& imax_buffer, const size_t imax_offset,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_icamax(static_cast<int>(n),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<double2>& imax_buffer, const size_t imax_offset,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
- ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
+ ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(static_cast<int>(n),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
@@ -498,7 +498,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -513,7 +513,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -530,7 +530,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -547,7 +547,7 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgemv(layout, a_transpose,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -583,7 +583,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -598,7 +598,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -615,7 +615,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -632,7 +632,7 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgbmv(layout, a_transpose,
- m, n, kl, ku,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -670,7 +670,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chemv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -687,7 +687,7 @@ void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhemv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -706,7 +706,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -723,7 +723,7 @@ void cblasXhbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -742,7 +742,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chpmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -759,7 +759,7 @@ void cblasXhpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhpmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
@@ -776,7 +776,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_ssymv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -791,7 +791,7 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dsymv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -827,7 +827,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_ssbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -842,7 +842,7 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dsbmv(layout, triangle,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -878,7 +878,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const float beta,
std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_sspmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -893,7 +893,7 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const double beta,
std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc) {
cblas_dspmv(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc),
@@ -926,7 +926,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_strmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -935,7 +935,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -944,7 +944,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -953,7 +953,7 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztrmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -976,7 +976,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -985,7 +985,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -994,7 +994,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1003,7 +1003,7 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztbmv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1026,7 +1026,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& ap_buffer, const size_t ap_offset,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1035,7 +1035,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& ap_buffer, const size_t ap_offset,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1044,7 +1044,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& ap_buffer, const size_t ap_offset,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1053,7 +1053,7 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& ap_buffer, const size_t ap_offset,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztpmv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1076,7 +1076,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_strsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1085,7 +1085,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1094,7 +1094,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1103,7 +1103,7 @@ void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztrsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1114,7 +1114,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1123,7 +1123,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
&a_buffer[a_offset], a_ld,
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1132,7 +1132,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1141,7 +1141,7 @@ void cblasXtbsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztbsv(layout, triangle, a_transpose, diagonal,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1152,7 +1152,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float>& ap_buffer, const size_t ap_offset,
std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_stpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1161,7 +1161,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double>& ap_buffer, const size_t ap_offset,
std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_dtpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
&ap_buffer[ap_offset],
&x_buffer[x_offset], static_cast<int>(x_inc));
}
@@ -1170,7 +1170,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<float2>& ap_buffer, const size_t ap_offset,
std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ctpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const float*>(&ap_buffer[ap_offset]),
reinterpret_cast<float*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1179,7 +1179,7 @@ void cblasXtpsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const std::vector<double2>& ap_buffer, const size_t ap_offset,
std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) {
cblas_ztpsv(layout, triangle, a_transpose, diagonal,
- n,
+ static_cast<int>(n),
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
@@ -1192,7 +1192,7 @@ void cblasXger(const CBLAS_ORDER layout,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_sger(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1205,7 +1205,7 @@ void cblasXger(const CBLAS_ORDER layout,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dger(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1238,7 +1238,7 @@ void cblasXgeru(const CBLAS_ORDER layout,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cgeru(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1252,7 +1252,7 @@ void cblasXgeru(const CBLAS_ORDER layout,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zgeru(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1268,7 +1268,7 @@ void cblasXgerc(const CBLAS_ORDER layout,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cgerc(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1282,7 +1282,7 @@ void cblasXgerc(const CBLAS_ORDER layout,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zgerc(layout,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1296,7 +1296,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_cher(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&a_buffer[a_offset]), a_ld);
@@ -1307,7 +1307,7 @@ void cblasXher(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_zher(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&a_buffer[a_offset]), a_ld);
@@ -1320,7 +1320,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float2>& ap_buffer, const size_t ap_offset) {
cblas_chpr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<float*>(&ap_buffer[ap_offset]));
@@ -1331,7 +1331,7 @@ void cblasXhpr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double2>& ap_buffer, const size_t ap_offset) {
cblas_zhpr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&ap_buffer[ap_offset]));
@@ -1346,7 +1346,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<float2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cher2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1360,7 +1360,7 @@ void cblasXher2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<double2>& a_buffer, const size_t a_offset, const size_t a_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zher2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1376,7 +1376,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<float2>& ap_buffer, const size_t ap_offset) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_chpr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const float*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1390,7 +1390,7 @@ void cblasXhpr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
std::vector<double2>& ap_buffer, const size_t ap_offset) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zhpr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<const double*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
@@ -1404,7 +1404,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_ssyr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
@@ -1415,7 +1415,7 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dsyr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
@@ -1442,7 +1442,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<float>& ap_buffer, const size_t ap_offset) {
cblas_sspr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
@@ -1453,7 +1453,7 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<double>& ap_buffer, const size_t ap_offset) {
cblas_dspr(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
@@ -1481,7 +1481,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_ssyr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1494,7 +1494,7 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld) {
cblas_dsyr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1526,7 +1526,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<float>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<float>& ap_buffer, const size_t ap_offset) {
cblas_sspr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1539,7 +1539,7 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const std::vector<double>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<double>& ap_buffer, const size_t ap_offset) {
cblas_dspr2(layout, triangle,
- n,
+ static_cast<int>(n),
alpha,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc),
@@ -1576,7 +1576,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_sgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1591,7 +1591,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1608,7 +1608,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_cgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1625,7 +1625,7 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zgemm(layout, a_transpose, b_transpose,
- m, n, k,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1661,7 +1661,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1676,7 +1676,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1693,7 +1693,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1710,7 +1710,7 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsymm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1748,7 +1748,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_chemm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1765,7 +1765,7 @@ void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zhemm(layout, side, triangle,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1781,7 +1781,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
beta,
@@ -1794,7 +1794,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
beta,
@@ -1809,7 +1809,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
beta_array.data(),
@@ -1824,7 +1824,7 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsyrk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
beta_array.data(),
@@ -1855,7 +1855,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const float beta,
std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_cherk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
beta,
@@ -1868,7 +1868,7 @@ void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
const double beta,
std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_zherk(layout, triangle, a_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
beta,
@@ -1884,7 +1884,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const float beta,
std::vector<float>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_ssyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1899,7 +1899,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const double beta,
std::vector<double>& c_buffer, const size_t c_offset, const size_t c_ld) {
cblas_dsyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld,
@@ -1916,7 +1916,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<float>{beta.real(), beta.imag()};
cblas_csyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1933,7 +1933,7 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
const auto beta_array = std::vector<double>{beta.real(), beta.imag()};
cblas_zsyr2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -1970,7 +1970,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
std::vector<float2>& c_buffer, const size_t c_offset, const size_t c_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_cher2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const float*>(&b_buffer[b_offset]), b_ld,
@@ -1986,7 +1986,7 @@ void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
std::vector<double2>& c_buffer, const size_t c_offset, const size_t c_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_zher2k(layout, triangle, ab_transpose,
- n, k,
+ static_cast<int>(n), static_cast<int>(k),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<const double*>(&b_buffer[b_offset]), b_ld,
@@ -2001,7 +2001,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_strmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2012,7 +2012,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_dtrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2024,7 +2024,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_ctrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
@@ -2036,7 +2036,7 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_ztrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
@@ -2063,7 +2063,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<float>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<float>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_strsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2074,7 +2074,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
const std::vector<double>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<double>& b_buffer, const size_t b_offset, const size_t b_ld) {
cblas_dtrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha,
&a_buffer[a_offset], a_ld,
&b_buffer[b_offset], b_ld);
@@ -2086,7 +2086,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<float2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<float>{alpha.real(), alpha.imag()};
cblas_ctrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const float*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<float*>(&b_buffer[b_offset]), b_ld);
@@ -2098,7 +2098,7 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
std::vector<double2>& b_buffer, const size_t b_offset, const size_t b_ld) {
const auto alpha_array = std::vector<double>{alpha.real(), alpha.imag()};
cblas_ztrsm(layout, side, triangle, a_transpose, diagonal,
- m, n,
+ static_cast<int>(m), static_cast<int>(n),
alpha_array.data(),
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
diff --git a/test/wrapper_cublas.hpp b/test/wrapper_cublas.hpp
new file mode 100644
index 00000000..35b1b9c6
--- /dev/null
+++ b/test/wrapper_cublas.hpp
@@ -0,0 +1,2548 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements a wrapper around the cuBLAS library, such that its routines can be called
+// in a similar way as the CLBlast routines: using alpha and beta to determine the precision.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_WRAPPER_CUBLAS_H_
+#define CLBLAST_TEST_WRAPPER_CUBLAS_H_
+
+#include <cuda_runtime.h>
+#include <cublas_v2.h>
+
+#include "utilities/utilities.hpp"
+
+namespace clblast {
+
+// Conversions from CLBlast types
+cublasOperation_t convertToCUBLAS(const Transpose v) { return (v == Transpose::kNo) ? CUBLAS_OP_N : (v == Transpose::kYes) ? CUBLAS_OP_T : CUBLAS_OP_C; }
+cublasFillMode_t convertToCUBLAS(const Triangle v) { return (v == Triangle::kUpper) ? CUBLAS_FILL_MODE_UPPER : CUBLAS_FILL_MODE_LOWER; }
+cublasDiagType_t convertToCUBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT; }
+cublasSideMode_t convertToCUBLAS(const Side v) { return (v == Side::kLeft) ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; }
+
+// =================================================================================================
+// BLAS level-1 (vector-vector) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SROTG/DROTG
+template <typename T>
+cublasStatus_t cublasXrotg(cublasHandle_t handle, T* sa_buffer, const size_t sa_offset,
+ T* sb_buffer, const size_t sb_offset,
+ T* sc_buffer, const size_t sc_offset,
+ T* ss_buffer, const size_t ss_offset);
+template <>
+cublasStatus_t cublasXrotg<float>(cublasHandle_t handle, float* sa_buffer, const size_t sa_offset,
+ float* sb_buffer, const size_t sb_offset,
+ float* sc_buffer, const size_t sc_offset,
+ float* ss_buffer, const size_t ss_offset) {
+ auto status = cublasSrotg(handle, &sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotg<double>(cublasHandle_t handle, double* sa_buffer, const size_t sa_offset,
+ double* sb_buffer, const size_t sb_offset,
+ double* sc_buffer, const size_t sc_offset,
+ double* ss_buffer, const size_t ss_offset) {
+ auto status = cublasDrotg(handle, &sa_buffer[sa_offset],
+ &sb_buffer[sb_offset],
+ &sc_buffer[sc_offset],
+ &ss_buffer[ss_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROTMG/DROTMG
+template <typename T>
+cublasStatus_t cublasXrotmg(cublasHandle_t handle, T* sd1_buffer, const size_t sd1_offset,
+ T* sd2_buffer, const size_t sd2_offset,
+ T* sx1_buffer, const size_t sx1_offset,
+ const T* sy1_buffer, const size_t sy1_offset,
+ T* sparam_buffer, const size_t sparam_offset);
+template <>
+cublasStatus_t cublasXrotmg<float>(cublasHandle_t handle, float* sd1_buffer, const size_t sd1_offset,
+ float* sd2_buffer, const size_t sd2_offset,
+ float* sx1_buffer, const size_t sx1_offset,
+ const float* sy1_buffer, const size_t sy1_offset,
+ float* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasSrotmg(handle, &sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ &sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotmg<double>(cublasHandle_t handle, double* sd1_buffer, const size_t sd1_offset,
+ double* sd2_buffer, const size_t sd2_offset,
+ double* sx1_buffer, const size_t sx1_offset,
+ const double* sy1_buffer, const size_t sy1_offset,
+ double* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasDrotmg(handle, &sd1_buffer[sd1_offset],
+ &sd2_buffer[sd2_offset],
+ &sx1_buffer[sx1_offset],
+ &sy1_buffer[sy1_offset],
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROT/DROT
+cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc,
+ const float cos,
+ const float sin) {
+ auto status = cublasSrot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &cos,
+ &sin);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXrot(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc,
+ const double cos,
+ const double sin) {
+ auto status = cublasDrot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &cos,
+ &sin);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SROTM/DROTM
+template <typename T>
+cublasStatus_t cublasXrotm(cublasHandle_t handle, const size_t n,
+ T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc,
+ T* sparam_buffer, const size_t sparam_offset);
+template <>
+cublasStatus_t cublasXrotm<float>(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasSrotm(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXrotm<double>(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* sparam_buffer, const size_t sparam_offset) {
+ auto status = cublasDrotm(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &sparam_buffer[sparam_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
+template <typename T>
+cublasStatus_t cublasXswap(cublasHandle_t handle, const size_t n,
+ T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXswap<float>(cublasHandle_t handle, const size_t n,
+ float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSswap(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<double>(cublasHandle_t handle, const size_t n,
+ double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDswap(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<float2>(cublasHandle_t handle, const size_t n,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCswap(handle, static_cast<int>(n),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<double2>(cublasHandle_t handle, const size_t n,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZswap(handle, static_cast<int>(n),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXswap<half>(cublasHandle_t handle, const size_t n,
+ half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const float alpha,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSscal(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const double alpha,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDscal(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const float2 alpha,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCscal(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const double2 alpha,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZscal(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXscal(cublasHandle_t handle, const size_t n,
+ const half alpha,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
+template <typename T>
+cublasStatus_t cublasXcopy(cublasHandle_t handle, const size_t n,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXcopy<float>(cublasHandle_t handle, const size_t n,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasScopy(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<double>(cublasHandle_t handle, const size_t n,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDcopy(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<float2>(cublasHandle_t handle, const size_t n,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCcopy(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<double2>(cublasHandle_t handle, const size_t n,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZcopy(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXcopy<half>(cublasHandle_t handle, const size_t n,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSaxpy(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDaxpy(handle, static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCaxpy(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZaxpy(handle, static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXaxpy(cublasHandle_t handle, const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SDOT/DDOT
+template <typename T>
+cublasStatus_t cublasXdot(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdot<float>(cublasHandle_t handle, const size_t n,
+ float* dot_buffer, const size_t dot_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasSdot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &dot_buffer[dot_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdot<double>(cublasHandle_t handle, const size_t n,
+ double* dot_buffer, const size_t dot_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasDdot(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &dot_buffer[dot_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdot<half>(cublasHandle_t handle, const size_t n,
+ half* dot_buffer, const size_t dot_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CDOTU/ZDOTU
+template <typename T>
+cublasStatus_t cublasXdotu(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdotu<float2>(cublasHandle_t handle, const size_t n,
+ float2* dot_buffer, const size_t dot_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCdotu(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdotu<double2>(cublasHandle_t handle, const size_t n,
+ double2* dot_buffer, const size_t dot_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZdotu(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CDOTC/ZDOTC
+template <typename T>
+cublasStatus_t cublasXdotc(cublasHandle_t handle, const size_t n,
+ T* dot_buffer, const size_t dot_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc,
+ const T* y_buffer, const size_t y_offset, const size_t y_inc);
+template <>
+cublasStatus_t cublasXdotc<float2>(cublasHandle_t handle, const size_t n,
+ float2* dot_buffer, const size_t dot_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasCdotc(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXdotc<double2>(cublasHandle_t handle, const size_t n,
+ double2* dot_buffer, const size_t dot_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto status = cublasZdotc(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&dot_buffer[dot_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
+template <typename T>
+cublasStatus_t cublasXnrm2(cublasHandle_t handle, const size_t n,
+ T* nrm2_buffer, const size_t nrm2_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXnrm2<float>(cublasHandle_t handle, const size_t n,
+ float* nrm2_buffer, const size_t nrm2_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSnrm2(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &nrm2_buffer[nrm2_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<double>(cublasHandle_t handle, const size_t n,
+ double* nrm2_buffer, const size_t nrm2_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDnrm2(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &nrm2_buffer[nrm2_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<float2>(cublasHandle_t handle, const size_t n,
+ float2* nrm2_buffer, const size_t nrm2_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasScnrm2(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&nrm2_buffer[nrm2_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<double2>(cublasHandle_t handle, const size_t n,
+ double2* nrm2_buffer, const size_t nrm2_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDznrm2(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&nrm2_buffer[nrm2_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXnrm2<half>(cublasHandle_t handle, const size_t n,
+ half* nrm2_buffer, const size_t nrm2_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SASUM/DASUM/ScASUM/DzASUM
+template <typename T>
+cublasStatus_t cublasXasum(cublasHandle_t handle, const size_t n,
+ T* asum_buffer, const size_t asum_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXasum<float>(cublasHandle_t handle, const size_t n,
+ float* asum_buffer, const size_t asum_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasSasum(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &asum_buffer[asum_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<double>(cublasHandle_t handle, const size_t n,
+ double* asum_buffer, const size_t asum_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDasum(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &asum_buffer[asum_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<float2>(cublasHandle_t handle, const size_t n,
+ float2* asum_buffer, const size_t asum_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasScasum(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<float*>(&asum_buffer[asum_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<double2>(cublasHandle_t handle, const size_t n,
+ double2* asum_buffer, const size_t asum_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasDzasum(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<double*>(&asum_buffer[asum_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXasum<half>(cublasHandle_t handle, const size_t n,
+ half* asum_buffer, const size_t asum_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
+template <typename T>
+cublasStatus_t cublasXamax(cublasHandle_t handle, const size_t n,
+ T* imax_buffer, const size_t imax_offset,
+ const T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXamax<float>(cublasHandle_t handle, const size_t n,
+ float* imax_buffer, const size_t imax_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIsamax(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<double>(cublasHandle_t handle, const size_t n,
+ double* imax_buffer, const size_t imax_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIdamax(handle, static_cast<int>(n),
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<float2>(cublasHandle_t handle, const size_t n,
+ float2* imax_buffer, const size_t imax_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIcamax(handle, static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<double2>(cublasHandle_t handle, const size_t n,
+ double2* imax_buffer, const size_t imax_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto status = cublasIzamax(handle, static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<int*>(&imax_buffer[imax_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXamax<half>(cublasHandle_t handle, const size_t n,
+ half* imax_buffer, const size_t imax_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// =================================================================================================
+// BLAS level-2 (matrix-vector) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SGEMV/DGEMV/CGEMV/ZGEMV
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgemv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgbmv(handle, a_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(kl), static_cast<int>(ku),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgbmv(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHEMV/ZHEMV
+cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChemv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhemv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhemv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHBMV/ZHBMV
+cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPMV/ZHPMV
+cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* ap_buffer, const size_t ap_offset,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2 beta,
+ float2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChpmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* ap_buffer, const size_t ap_offset,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2 beta,
+ double2* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhpmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYMV/DSYMV
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsymv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsymv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSBMV/DSBMV
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsbmv(handle, triangle,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPMV/DSPMV
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* ap_buffer, const size_t ap_offset,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ float* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* ap_buffer, const size_t ap_offset,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ double* y_buffer, const size_t y_offset, const size_t y_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspmv(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &beta,
+ &y_buffer[y_offset], static_cast<int>(y_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* ap_buffer, const size_t ap_offset,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ half* y_buffer, const size_t y_offset, const size_t y_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
+template <typename T>
+cublasStatus_t cublasXtrmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtrmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtrmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
+template <typename T>
+cublasStatus_t cublasXtbmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtbmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtbmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
+template <typename T>
+cublasStatus_t cublasXtpmv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* ap_buffer, const size_t ap_offset,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtpmv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* ap_buffer, const size_t ap_offset,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* ap_buffer, const size_t ap_offset,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* ap_buffer, const size_t ap_offset,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* ap_buffer, const size_t ap_offset,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtpmv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpmv<half>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const half* ap_buffer, const size_t ap_offset,
+ half* x_buffer, const size_t x_offset, const size_t x_inc) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
+template <typename T>
+cublasStatus_t cublasXtrsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtrsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtrsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtrsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STBSV/DTBSV/CTBSV/ZTBSV
+template <typename T>
+cublasStatus_t cublasXtbsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const T* a_buffer, const size_t a_offset, const size_t a_ld,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtbsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ &a_buffer[a_offset], a_ld,
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtbsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n, const size_t k,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtbsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n), static_cast<int>(k),
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STPSV/DTPSV/CTPSV/ZTPSV
+template <typename T>
+cublasStatus_t cublasXtpsv(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const T* ap_buffer, const size_t ap_offset,
+ T* x_buffer, const size_t x_offset, const size_t x_inc);
+template <>
+cublasStatus_t cublasXtpsv<float>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float* ap_buffer, const size_t ap_offset,
+ float* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<double>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double* ap_buffer, const size_t ap_offset,
+ double* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ &ap_buffer[ap_offset],
+ &x_buffer[x_offset], static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<float2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const float2* ap_buffer, const size_t ap_offset,
+ float2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+template <>
+cublasStatus_t cublasXtpsv<double2>(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t n,
+ const double2* ap_buffer, const size_t ap_offset,
+ double2* x_buffer, const size_t x_offset, const size_t x_inc) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZtpsv(handle, triangle, a_transpose, diagonal,
+ static_cast<int>(n),
+ reinterpret_cast<const cuDoubleComplex*>(&ap_buffer[ap_offset]),
+ reinterpret_cast<cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SGER/DGER
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSger(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDger(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXger(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CGERU/ZGERU
+cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCgeru(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgeru(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZgeru(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CGERC/ZGERC
+cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCgerc(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgerc(cublasHandle_t handle, const Layout layout,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZgerc(handle, static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHER/ZHER
+cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCher(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZher(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPR/ZHPR
+cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ float2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasChpr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ double2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZhpr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHER2/ZHER2
+cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCher2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZher2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&a_buffer[a_offset]), a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for CHPR2/ZHPR2
+cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float2 alpha,
+ const float2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float2* y_buffer, const size_t y_offset, const size_t y_inc,
+ float2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasChpr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhpr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double2 alpha,
+ const double2* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double2* y_buffer, const size_t y_offset, const size_t y_inc,
+ double2* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZhpr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
+ reinterpret_cast<const cuDoubleComplex*>(&y_buffer[y_offset]), static_cast<int>(y_inc),
+ reinterpret_cast<cuDoubleComplex*>(&ap_buffer[ap_offset]));
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYR/DSYR
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPR/DSPR
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ float* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ double* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspr(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ half* ap_buffer, const size_t ap_offset) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSYR2/DSYR2
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* a_buffer, const size_t a_offset, const size_t a_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &a_buffer[a_offset], a_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* a_buffer, const size_t a_offset, const size_t a_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSPR2/DSPR2
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const float alpha,
+ const float* x_buffer, const size_t x_offset, const size_t x_inc,
+ const float* y_buffer, const size_t y_offset, const size_t y_inc,
+ float* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSspr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const double alpha,
+ const double* x_buffer, const size_t x_offset, const size_t x_inc,
+ const double* y_buffer, const size_t y_offset, const size_t y_inc,
+ double* ap_buffer, const size_t ap_offset) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDspr2(handle, triangle,
+ static_cast<int>(n),
+ &alpha,
+ &x_buffer[x_offset], static_cast<int>(x_inc),
+ &y_buffer[y_offset], static_cast<int>(y_inc),
+ &ap_buffer[ap_offset]);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXspr2(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle,
+ const size_t n,
+ const half alpha,
+ const half* x_buffer, const size_t x_offset, const size_t x_inc,
+ const half* y_buffer, const size_t y_offset, const size_t y_inc,
+ half* ap_buffer, const size_t ap_offset) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// =================================================================================================
+// BLAS level-3 (matrix-matrix) routines
+// =================================================================================================
+
+// Forwards the cuBLAS calls for SGEMM/DGEMM/CGEMM/ZGEMM
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZgemm(handle, a_transpose, b_transpose,
+ static_cast<int>(m), static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXgemm(cublasHandle_t handle, const Layout layout, const cublasOperation_t a_transpose, const cublasOperation_t b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsymm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsymm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHEMM/ZHEMM
+cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasChemm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXhemm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZhemm(handle, side, triangle,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYRK/DSYRK/CSYRK/ZSYRK
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsyrk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyrk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHERK/ZHERK
+cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasCherk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXherk(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasZherk(handle, triangle, a_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ &beta,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for SSYR2K/DSYR2K/CSYR2K/ZSYR2K
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasSsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld,
+ &beta,
+ &c_buffer[c_offset], c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float2 beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasCsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double2 beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ cuDoubleComplex beta_cuda;
+ beta_cuda.x = beta.real();
+ beta_cuda.y = beta.imag();
+ auto status = cublasZsyr2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta_cuda,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXsyr2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ const half* b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ half* c_buffer, const size_t c_offset, const size_t c_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for CHER2K/ZHER2K
+cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const float2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ float2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCher2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta,
+ reinterpret_cast<cuComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXher2k(cublasHandle_t handle, const Layout layout, const cublasFillMode_t triangle, const cublasOperation_t ab_transpose,
+ const size_t n, const size_t k,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ const double2* b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ double2* c_buffer, const size_t c_offset, const size_t c_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZher2k(handle, triangle, ab_transpose,
+ static_cast<int>(n), static_cast<int>(k),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&b_buffer[b_offset]), b_ld,
+ &beta,
+ reinterpret_cast<cuDoubleComplex*>(&c_buffer[c_offset]), c_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// Forwards the cuBLAS calls for STRMM/DTRMM/CTRMM/ZTRMM
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZtrmm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrmm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const half* a_buffer, const size_t a_offset, const size_t a_ld,
+ half* b_buffer, const size_t b_offset, const size_t b_ld) {
+ return CUBLAS_STATUS_NOT_SUPPORTED;
+}
+
+// Forwards the cuBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const float* a_buffer, const size_t a_offset, const size_t a_ld,
+ float* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasStrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const double* a_buffer, const size_t a_offset, const size_t a_ld,
+ double* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ auto status = cublasDtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha,
+ &a_buffer[a_offset], a_ld,
+ &b_buffer[b_offset], b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const float2 alpha,
+ const float2* a_buffer, const size_t a_offset, const size_t a_ld,
+ float2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasCtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+cublasStatus_t cublasXtrsm(cublasHandle_t handle, const Layout layout, const cublasSideMode_t side, const cublasFillMode_t triangle, const cublasOperation_t a_transpose, const cublasDiagType_t diagonal,
+ const size_t m, const size_t n,
+ const double2 alpha,
+ const double2* a_buffer, const size_t a_offset, const size_t a_ld,
+ double2* b_buffer, const size_t b_offset, const size_t b_ld) {
+ if (layout == Layout::kRowMajor) { return CUBLAS_STATUS_NOT_SUPPORTED; }
+ cuDoubleComplex alpha_cuda;
+ alpha_cuda.x = alpha.real();
+ alpha_cuda.y = alpha.imag();
+ auto status = cublasZtrsm(handle, side, triangle, a_transpose, diagonal,
+ static_cast<int>(m), static_cast<int>(n),
+ &alpha_cuda,
+ reinterpret_cast<const cuDoubleComplex*>(&a_buffer[a_offset]), a_ld,
+ reinterpret_cast<cuDoubleComplex*>(&b_buffer[b_offset]), b_ld);
+ cudaDeviceSynchronize();
+ return status;
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_WRAPPER_CUBLAS_H_
+#endif
diff --git a/test/wrapper_cuda.hpp b/test/wrapper_cuda.hpp
new file mode 100644
index 00000000..c97ae3ef
--- /dev/null
+++ b/test/wrapper_cuda.hpp
@@ -0,0 +1,149 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file contains all the CUDA related code; used only in case of testing against cuBLAS
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_WRAPPER_CUDA_H_
+#define CLBLAST_TEST_WRAPPER_CUDA_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <stdexcept>
+
+#include "utilities/utilities.hpp"
+
+#ifdef CLBLAST_REF_CUBLAS
+ #include <cuda_runtime.h>
+ #include <cublas_v2.h>
+#endif
+
+namespace clblast {
+// =================================================================================================
+
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void cublasSetup(Arguments<T> &args) {
+ cudaSetDevice(static_cast<int>(args.device_id));
+ auto status = cublasCreate(reinterpret_cast<cublasHandle_t*>(&args.cublas_handle));
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ throw std::runtime_error("CUDA cublasCreate error");
+ }
+ }
+#endif
+
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void cublasTeardown(Arguments<T> &args) {
+ auto status = cublasDestroy(reinterpret_cast<cublasHandle_t>(args.cublas_handle));
+ if (status != CUBLAS_STATUS_SUCCESS) {
+ throw std::runtime_error("CUDA cublasDestroy error");
+ }
+ }
+#endif
+
+// =================================================================================================
+
+// Copies data from the CUDA device to the host and frees-up the CUDA memory afterwards
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void CUDAToHost(T** buffer_cuda, std::vector<T> &buffer_host, const size_t size) {
+ auto status1 = cudaMemcpy(
+ reinterpret_cast<void*>(buffer_host.data()),
+ reinterpret_cast<void*>(*buffer_cuda),
+ size*sizeof(T),
+ cudaMemcpyDeviceToHost
+ );
+ if (status1 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast<int>(status1)));
+ }
+ auto status2 = cudaFree(*buffer_cuda);
+ if (status2 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaFree error with status: "+ToString(static_cast<int>(status2)));
+ }
+ *buffer_cuda = nullptr;
+}
+#else
+ template <typename T> void CUDAToHost(T**, const std::vector<T>&, const size_t) { }
+#endif
+
+// Allocates space on the CUDA device and copies in data from the host
+#ifdef CLBLAST_REF_CUBLAS
+ template <typename T>
+ void HostToCUDA(T** buffer_cuda, std::vector<T> &buffer_host, const size_t size) {
+ if (*buffer_cuda == nullptr) {
+ auto status1 = cudaMalloc(reinterpret_cast<void**>(buffer_cuda), size*sizeof(T));
+ if (status1 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMalloc error with status: "+ToString(static_cast<int>(status1)));
+ }
+ }
+ auto status2 = cudaMemcpy(
+ reinterpret_cast<void*>(*buffer_cuda),
+ reinterpret_cast<void*>(buffer_host.data()),
+ size*sizeof(T),
+ cudaMemcpyHostToDevice
+ );
+ if (status2 != cudaSuccess) {
+ throw std::runtime_error("CUDA cudaMemcpy error with status: "+ToString(static_cast<int>(status2)));
+ }
+ }
+#else
+ template <typename T> void HostToCUDA(T**, const std::vector<T>&, const size_t) { }
+#endif
+
+// =================================================================================================
+
+template <typename T>
+struct BuffersCUDA {
+ T* x_vec = nullptr;
+ T* y_vec = nullptr;
+ T* a_mat = nullptr;
+ T* b_mat = nullptr;
+ T* c_mat = nullptr;
+ T* ap_mat = nullptr;
+ T* scalar = nullptr;
+};
+
+template <typename T, typename U>
+void CUDAToHost(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host,
+ const std::vector<std::string> &names) {
+ for (auto &name: names) {
+ if (name == kBufVecX) { buffers_host.x_vec = std::vector<T>(args.x_size, static_cast<T>(0)); CUDAToHost(&buffers.x_vec, buffers_host.x_vec, args.x_size); }
+ else if (name == kBufVecY) { buffers_host.y_vec = std::vector<T>(args.y_size, static_cast<T>(0)); CUDAToHost(&buffers.y_vec, buffers_host.y_vec, args.y_size); }
+ else if (name == kBufMatA) { buffers_host.a_mat = std::vector<T>(args.a_size, static_cast<T>(0)); CUDAToHost(&buffers.a_mat, buffers_host.a_mat, args.a_size); }
+ else if (name == kBufMatB) { buffers_host.b_mat = std::vector<T>(args.b_size, static_cast<T>(0)); CUDAToHost(&buffers.b_mat, buffers_host.b_mat, args.b_size); }
+ else if (name == kBufMatC) { buffers_host.c_mat = std::vector<T>(args.c_size, static_cast<T>(0)); CUDAToHost(&buffers.c_mat, buffers_host.c_mat, args.c_size); }
+ else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector<T>(args.ap_size, static_cast<T>(0)); CUDAToHost(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); }
+ else if (name == kBufScalar) { buffers_host.scalar = std::vector<T>(args.scalar_size, static_cast<T>(0)); CUDAToHost(&buffers.scalar, buffers_host.scalar, args.scalar_size); }
+ else { throw std::runtime_error("Invalid buffer name"); }
+ }
+}
+
+template <typename T, typename U>
+void HostToCUDA(const Arguments<U> &args, BuffersCUDA<T> &buffers, BuffersHost<T> &buffers_host,
+ const std::vector<std::string> &names) {
+ for (auto &name: names) {
+ if (name == kBufVecX) { HostToCUDA(&buffers.x_vec, buffers_host.x_vec, args.x_size); }
+ else if (name == kBufVecY) { HostToCUDA(&buffers.y_vec, buffers_host.y_vec, args.y_size); }
+ else if (name == kBufMatA) { HostToCUDA(&buffers.a_mat, buffers_host.a_mat, args.a_size); }
+ else if (name == kBufMatB) { HostToCUDA(&buffers.b_mat, buffers_host.b_mat, args.b_size); }
+ else if (name == kBufMatC) { HostToCUDA(&buffers.c_mat, buffers_host.c_mat, args.c_size); }
+ else if (name == kBufMatAP) { HostToCUDA(&buffers.ap_mat, buffers_host.ap_mat, args.ap_size); }
+ else if (name == kBufScalar) { HostToCUDA(&buffers.scalar, buffers_host.scalar, args.scalar_size); }
+ else { throw std::runtime_error("Invalid buffer name"); }
+ }
+}
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_WRAPPER_CUDA_H_
+#endif