summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-25 14:37:26 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-25 14:37:26 +0200
commit4612ff3552d94ab8827888c3de2fcac76190a686 (patch)
tree2ef7452fa0d6b57966d852f43f9fbb48309b82c2 /test
parent9f8745507020961b1c287febc3a5634b46ccb0e9 (diff)
Added possibility to run the performance client with half-precision
Diffstat (limited to 'test')
-rw-r--r--test/performance/client.cc12
-rw-r--r--test/performance/routines/level1/xamax.cc3
-rw-r--r--test/performance/routines/level1/xasum.cc3
-rw-r--r--test/performance/routines/level1/xaxpy.cc3
-rw-r--r--test/performance/routines/level1/xcopy.cc3
-rw-r--r--test/performance/routines/level1/xdot.cc3
-rw-r--r--test/performance/routines/level1/xnrm2.cc3
-rw-r--r--test/performance/routines/level1/xscal.cc3
-rw-r--r--test/performance/routines/level1/xswap.cc3
-rw-r--r--test/performance/routines/level2/xgbmv.cc3
-rw-r--r--test/performance/routines/level2/xgemv.cc3
-rw-r--r--test/performance/routines/level2/xger.cc3
-rw-r--r--test/performance/routines/level2/xsbmv.cc3
-rw-r--r--test/performance/routines/level2/xspmv.cc3
-rw-r--r--test/performance/routines/level2/xspr.cc3
-rw-r--r--test/performance/routines/level2/xspr2.cc3
-rw-r--r--test/performance/routines/level2/xsymv.cc3
-rw-r--r--test/performance/routines/level2/xsyr.cc3
-rw-r--r--test/performance/routines/level2/xsyr2.cc3
-rw-r--r--test/performance/routines/level2/xtbmv.cc3
-rw-r--r--test/performance/routines/level2/xtpmv.cc3
-rw-r--r--test/performance/routines/level2/xtrmv.cc3
-rw-r--r--test/performance/routines/level3/xgemm.cc3
-rw-r--r--test/performance/routines/level3/xsymm.cc3
-rw-r--r--test/performance/routines/level3/xsyr2k.cc3
-rw-r--r--test/performance/routines/level3/xsyrk.cc3
-rw-r--r--test/performance/routines/level3/xtrmm.cc3
-rw-r--r--test/performance/routines/level3/xtrsm.cc3
-rw-r--r--test/wrapper_cblas.h192
-rw-r--r--test/wrapper_clblas.h255
30 files changed, 513 insertions, 27 deletions
diff --git a/test/performance/client.cc b/test/performance/client.cc
index 9aaf1e4e..5a7226df 100644
--- a/test/performance/client.cc
+++ b/test/performance/client.cc
@@ -116,6 +116,17 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const GetMetric
// which is thus always displayed (unless silence is specified).
if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); }
+ // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
+ if (args.precision == Precision::kHalf) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (!args.silent) {
+ fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
+ }
+ }
+ args.compare_clblas = 0;
+ args.compare_cblas = 0;
+ }
+
// Returns the arguments
return args;
}
@@ -339,6 +350,7 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
// =================================================================================================
// Compiles the templated class
+template class Client<half,half>;
template class Client<float,float>;
template class Client<double,double>;
template class Client<float2,float2>;
diff --git a/test/performance/routines/level1/xamax.cc b/test/performance/routines/level1/xamax.cc
index 85caa483..4af1f1c0 100644
--- a/test/performance/routines/level1/xamax.cc
+++ b/test/performance/routines/level1/xamax.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXamax<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xasum.cc b/test/performance/routines/level1/xasum.cc
index 2680966e..8e098890 100644
--- a/test/performance/routines/level1/xasum.cc
+++ b/test/performance/routines/level1/xasum.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXasum<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXasum<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xaxpy.cc b/test/performance/routines/level1/xaxpy.cc
index b423bc3a..b48c290d 100644
--- a/test/performance/routines/level1/xaxpy.cc
+++ b/test/performance/routines/level1/xaxpy.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXaxpy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXaxpy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xcopy.cc b/test/performance/routines/level1/xcopy.cc
index c04c6c1c..b7c60f0f 100644
--- a/test/performance/routines/level1/xcopy.cc
+++ b/test/performance/routines/level1/xcopy.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXcopy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXcopy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xdot.cc b/test/performance/routines/level1/xdot.cc
index f4616464..3edf2590 100644
--- a/test/performance/routines/level1/xdot.cc
+++ b/test/performance/routines/level1/xdot.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXdot<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXdot<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xnrm2.cc b/test/performance/routines/level1/xnrm2.cc
index db6ec9ad..f167df95 100644
--- a/test/performance/routines/level1/xnrm2.cc
+++ b/test/performance/routines/level1/xnrm2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXnrm2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXnrm2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xscal.cc b/test/performance/routines/level1/xscal.cc
index bd38f43e..35e21ba8 100644
--- a/test/performance/routines/level1/xscal.cc
+++ b/test/performance/routines/level1/xscal.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXscal<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXscal<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xswap.cc b/test/performance/routines/level1/xswap.cc
index 112641d3..4791d4c3 100644
--- a/test/performance/routines/level1/xswap.cc
+++ b/test/performance/routines/level1/xswap.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXswap<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXswap<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xgbmv.cc b/test/performance/routines/level2/xgbmv.cc
index b050184d..be4056de 100644
--- a/test/performance/routines/level2/xgbmv.cc
+++ b/test/performance/routines/level2/xgbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xgemv.cc b/test/performance/routines/level2/xgemv.cc
index 51ab9a10..50e6225a 100644
--- a/test/performance/routines/level2/xgemv.cc
+++ b/test/performance/routines/level2/xgemv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgemv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xger.cc b/test/performance/routines/level2/xger.cc
index 2d956346..b1b5a268 100644
--- a/test/performance/routines/level2/xger.cc
+++ b/test/performance/routines/level2/xger.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXger<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXger<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsbmv.cc b/test/performance/routines/level2/xsbmv.cc
index eabab3b7..5fb6e8c0 100644
--- a/test/performance/routines/level2/xsbmv.cc
+++ b/test/performance/routines/level2/xsbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspmv.cc b/test/performance/routines/level2/xspmv.cc
index 2a9ef925..e0ee2075 100644
--- a/test/performance/routines/level2/xspmv.cc
+++ b/test/performance/routines/level2/xspmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspr.cc b/test/performance/routines/level2/xspr.cc
index 84331d74..19651679 100644
--- a/test/performance/routines/level2/xspr.cc
+++ b/test/performance/routines/level2/xspr.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspr2.cc b/test/performance/routines/level2/xspr2.cc
index c42009a1..8745c004 100644
--- a/test/performance/routines/level2/xspr2.cc
+++ b/test/performance/routines/level2/xspr2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsymv.cc b/test/performance/routines/level2/xsymv.cc
index 3f72fe77..42de1ed5 100644
--- a/test/performance/routines/level2/xsymv.cc
+++ b/test/performance/routines/level2/xsymv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsymv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsyr.cc b/test/performance/routines/level2/xsyr.cc
index 6b31d3a9..310bfb5e 100644
--- a/test/performance/routines/level2/xsyr.cc
+++ b/test/performance/routines/level2/xsyr.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsyr2.cc b/test/performance/routines/level2/xsyr2.cc
index 0ad59d2d..bbeed3db 100644
--- a/test/performance/routines/level2/xsyr2.cc
+++ b/test/performance/routines/level2/xsyr2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtbmv.cc b/test/performance/routines/level2/xtbmv.cc
index a3297f34..24eec61f 100644
--- a/test/performance/routines/level2/xtbmv.cc
+++ b/test/performance/routines/level2/xtbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtpmv.cc b/test/performance/routines/level2/xtpmv.cc
index 72477f2d..2f2487f8 100644
--- a/test/performance/routines/level2/xtpmv.cc
+++ b/test/performance/routines/level2/xtpmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtpmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtpmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtrmv.cc b/test/performance/routines/level2/xtrmv.cc
index 894a7952..3f23afd1 100644
--- a/test/performance/routines/level2/xtrmv.cc
+++ b/test/performance/routines/level2/xtrmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xgemm.cc b/test/performance/routines/level3/xgemm.cc
index 91897ee1..8e48dc3a 100644
--- a/test/performance/routines/level3/xgemm.cc
+++ b/test/performance/routines/level3/xgemm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgemm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsymm.cc b/test/performance/routines/level3/xsymm.cc
index e0feadd1..7eac5537 100644
--- a/test/performance/routines/level3/xsymm.cc
+++ b/test/performance/routines/level3/xsymm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsymm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsyr2k.cc b/test/performance/routines/level3/xsyr2k.cc
index 4a82ddc4..49d00f34 100644
--- a/test/performance/routines/level3/xsyr2k.cc
+++ b/test/performance/routines/level3/xsyr2k.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr2k<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2k<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsyrk.cc b/test/performance/routines/level3/xsyrk.cc
index 70f61322..ad0a06b4 100644
--- a/test/performance/routines/level3/xsyrk.cc
+++ b/test/performance/routines/level3/xsyrk.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyrk<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyrk<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xtrmm.cc b/test/performance/routines/level3/xtrmm.cc
index 6f6041e4..92526844 100644
--- a/test/performance/routines/level3/xtrmm.cc
+++ b/test/performance/routines/level3/xtrmm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrmm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xtrsm.cc b/test/performance/routines/level3/xtrsm.cc
index 76ef255a..08e4b4a9 100644
--- a/test/performance/routines/level3/xtrsm.cc
+++ b/test/performance/routines/level3/xtrsm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrsm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrsm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h
index 3182fdfc..2fcab4d0 100644
--- a/test/wrapper_cblas.h
+++ b/test/wrapper_cblas.h
@@ -161,6 +161,11 @@ void cblasXswap(const size_t n,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXswap(const size_t n,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
void cblasXscal(const size_t n,
@@ -193,6 +198,11 @@ void cblasXscal(const size_t n,
alpha_array.data(),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXscal(const size_t n,
+ const half alpha,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
void cblasXcopy(const size_t n,
@@ -223,6 +233,11 @@ void cblasXcopy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXcopy(const size_t n,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
void cblasXaxpy(const size_t n,
@@ -263,6 +278,12 @@ void cblasXaxpy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXaxpy(const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SDOT/DDOT
void cblasXdot(const size_t n,
@@ -281,6 +302,12 @@ void cblasXdot(const size_t n,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXdot(const size_t n,
+ std::vector<half>& dot_buffer, const size_t dot_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for CDOTU/ZDOTU
void cblasXdotu(const size_t n,
@@ -347,6 +374,11 @@ void cblasXnrm2(const size_t n,
nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
+void cblasXnrm2(const size_t n,
+ std::vector<half>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM
void cblasXasum(const size_t n,
@@ -373,6 +405,11 @@ void cblasXasum(const size_t n,
asum_buffer[asum_offset].real(cblas_dzasum(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
+void cblasXasum(const size_t n,
+ std::vector<half>& asum_buffer, const size_t asum_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
void cblasXamax(const size_t n,
@@ -399,6 +436,11 @@ void cblasXamax(const size_t n,
((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXamax(const size_t n,
+ std::vector<half>& imax_buffer, const size_t imax_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@@ -469,6 +511,15 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
@@ -535,6 +586,15 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for CHEMV/ZHEMV
void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -675,6 +735,15 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSBMV/DSBMV
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -707,6 +776,15 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSPMV/DSPMV
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -739,6 +817,15 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& ap_buffer, const size_t ap_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -777,6 +864,12 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -815,6 +908,12 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -853,6 +952,12 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<half>& ap_buffer, const size_t ap_offset,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ return;
+}
// Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -995,6 +1100,14 @@ void cblasXger(const CBLAS_ORDER layout,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXger(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for CGERU/ZGERU
void cblasXgeru(const CBLAS_ORDER layout,
@@ -1187,6 +1300,13 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSPR/DSPR
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1211,6 +1331,13 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
}
+void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& ap_buffer, const size_t ap_offset) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSYR2/DSYR2
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1239,6 +1366,14 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSPR2/DSPR2
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1267,6 +1402,14 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&ap_buffer[ap_offset]);
}
+void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& ap_buffer, const size_t ap_offset) {
+ return;
+}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@@ -1337,6 +1480,15 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@@ -1403,6 +1555,15 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for CHEMM/ZHEMM
void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@@ -1497,6 +1658,14 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for CHERK/ZHERK
void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
@@ -1591,6 +1760,15 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for CHER2K/ZHER2K
void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
@@ -1673,6 +1851,13 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ return;
+}
// Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -1721,6 +1906,13 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ return;
+}
// =================================================================================================
} // namespace clblast
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index b9410cae..6e44d780 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -223,6 +223,14 @@ clblasStatus clblasXswap<double2>(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXswap<half>(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
clblasStatus clblasXscal(const size_t n,
@@ -265,6 +273,13 @@ clblasStatus clblasXscal(const size_t n,
x_buffer, x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXscal(const size_t n,
+ const half alpha,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T>
@@ -317,6 +332,14 @@ clblasStatus clblasXcopy<double2>(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXcopy<half>(const size_t n,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
clblasStatus clblasXaxpy(const size_t n,
@@ -367,6 +390,14 @@ clblasStatus clblasXaxpy(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXaxpy(const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SDOT/DDOT
template <typename T>
@@ -410,6 +441,15 @@ clblasStatus clblasXdot<double>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXdot<half>(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CDOTU/ZDOTU
template <typename T>
@@ -564,6 +604,14 @@ clblasStatus clblasXnrm2<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXnrm2<half>(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM
template <typename T>
@@ -632,6 +680,14 @@ clblasStatus clblasXasum<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXasum<half>(const size_t n,
+ cl_mem asum_buffer, const size_t asum_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
template <typename T>
@@ -700,6 +756,14 @@ clblasStatus clblasXamax<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXamax<half>(const size_t n,
+ cl_mem imax_buffer, const size_t imax_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@@ -778,6 +842,17 @@ clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_trans
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
@@ -852,6 +927,17 @@ clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_trans
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CHEMV/ZHEMV
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
@@ -1004,6 +1090,17 @@ clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSBMV/DSBMV
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
@@ -1042,6 +1139,17 @@ clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSPMV/DSPMV
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
@@ -1080,6 +1188,17 @@ clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem ap_buffer, const size_t ap_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
template <typename T>
@@ -1157,6 +1276,15 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtrmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
template <typename T>
@@ -1234,6 +1362,15 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtbmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n, const size_t k,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
template <typename T>
@@ -1311,6 +1448,15 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtpmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n,
+ const cl_mem ap_buffer, const size_t ap_offset,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
template <typename T>
@@ -1528,6 +1674,16 @@ clblasStatus clblasXger(const clblasOrder layout,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXger(const clblasOrder layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CGERU/ZGERU
clblasStatus clblasXgeru(const clblasOrder layout,
@@ -1754,6 +1910,15 @@ clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSPR/DSPR
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
@@ -1784,6 +1949,15 @@ clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
ap_buffer, ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem ap_buffer, const size_t ap_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSYR2/DSYR2
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
@@ -1818,6 +1992,16 @@ clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSPR2/DSPR2
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
@@ -1852,6 +2036,16 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
ap_buffer, ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem ap_buffer, const size_t ap_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@@ -1930,6 +2124,17 @@ clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_trans
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
@@ -2004,6 +2209,17 @@ clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CHEMM/ZHEMM
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
@@ -2108,6 +2324,16 @@ clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, co
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CHERK/ZHERK
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
@@ -2216,6 +2442,17 @@ clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, c
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for CHER2K/ZHER2K
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
@@ -2312,6 +2549,15 @@ clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const
b_buffer, b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
@@ -2370,6 +2616,15 @@ clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const
b_buffer, b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ return clblasNotImplemented;
+}
// =================================================================================================
} // namespace clblast