summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-30 11:11:28 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-30 11:11:28 +0200
commit61105e38100d323ea270f2cbee0a824d401eaa77 (patch)
treea6f8af9f6e75b57870bfce119f037093a46d2e9c /test
parent182d2cffa163688e2ae08d5d526f8eb63914b6ac (diff)
parent03182f9d07533f795a498936391da744d982e8e2 (diff)
Merge branch 'half_precision' into development
Diffstat (limited to 'test')
-rw-r--r--test/correctness/routines/level1/xamax.cc1
-rw-r--r--test/correctness/routines/level1/xasum.cc1
-rw-r--r--test/correctness/routines/level1/xaxpy.cc1
-rw-r--r--test/correctness/routines/level1/xcopy.cc1
-rw-r--r--test/correctness/routines/level1/xdot.cc1
-rw-r--r--test/correctness/routines/level1/xnrm2.cc1
-rw-r--r--test/correctness/routines/level1/xscal.cc1
-rw-r--r--test/correctness/routines/level1/xswap.cc1
-rw-r--r--test/correctness/routines/level2/xgbmv.cc1
-rw-r--r--test/correctness/routines/level2/xgemv.cc1
-rw-r--r--test/correctness/routines/level2/xger.cc1
-rw-r--r--test/correctness/routines/level2/xsbmv.cc1
-rw-r--r--test/correctness/routines/level2/xspmv.cc1
-rw-r--r--test/correctness/routines/level2/xspr.cc1
-rw-r--r--test/correctness/routines/level2/xspr2.cc1
-rw-r--r--test/correctness/routines/level2/xsymv.cc1
-rw-r--r--test/correctness/routines/level2/xsyr.cc1
-rw-r--r--test/correctness/routines/level2/xsyr2.cc1
-rw-r--r--test/correctness/routines/level2/xtbmv.cc1
-rw-r--r--test/correctness/routines/level2/xtpmv.cc1
-rw-r--r--test/correctness/routines/level2/xtrmv.cc1
-rw-r--r--test/correctness/routines/level3/xgemm.cc1
-rw-r--r--test/correctness/routines/level3/xsymm.cc1
-rw-r--r--test/correctness/routines/level3/xsyr2k.cc1
-rw-r--r--test/correctness/routines/level3/xsyrk.cc1
-rw-r--r--test/correctness/routines/level3/xtrmm.cc1
-rw-r--r--test/correctness/routines/level3/xtrsm.cc1
-rw-r--r--test/correctness/testblas.cc9
-rw-r--r--test/correctness/tester.cc13
-rw-r--r--test/performance/client.cc12
-rw-r--r--test/performance/routines/level1/xamax.cc3
-rw-r--r--test/performance/routines/level1/xasum.cc3
-rw-r--r--test/performance/routines/level1/xaxpy.cc3
-rw-r--r--test/performance/routines/level1/xcopy.cc3
-rw-r--r--test/performance/routines/level1/xdot.cc3
-rw-r--r--test/performance/routines/level1/xnrm2.cc3
-rw-r--r--test/performance/routines/level1/xscal.cc3
-rw-r--r--test/performance/routines/level1/xswap.cc3
-rw-r--r--test/performance/routines/level2/xgbmv.cc3
-rw-r--r--test/performance/routines/level2/xgemv.cc3
-rw-r--r--test/performance/routines/level2/xger.cc3
-rw-r--r--test/performance/routines/level2/xsbmv.cc3
-rw-r--r--test/performance/routines/level2/xspmv.cc3
-rw-r--r--test/performance/routines/level2/xspr.cc3
-rw-r--r--test/performance/routines/level2/xspr2.cc3
-rw-r--r--test/performance/routines/level2/xsymv.cc3
-rw-r--r--test/performance/routines/level2/xsyr.cc3
-rw-r--r--test/performance/routines/level2/xsyr2.cc3
-rw-r--r--test/performance/routines/level2/xtbmv.cc3
-rw-r--r--test/performance/routines/level2/xtpmv.cc3
-rw-r--r--test/performance/routines/level2/xtrmv.cc3
-rw-r--r--test/performance/routines/level3/xgemm.cc3
-rw-r--r--test/performance/routines/level3/xsymm.cc3
-rw-r--r--test/performance/routines/level3/xsyr2k.cc3
-rw-r--r--test/performance/routines/level3/xsyrk.cc3
-rw-r--r--test/performance/routines/level3/xtrmm.cc3
-rw-r--r--test/performance/routines/level3/xtrsm.cc3
-rw-r--r--test/routines/level1/xamax.h4
-rw-r--r--test/routines/level1/xasum.h4
-rw-r--r--test/routines/level1/xaxpy.h4
-rw-r--r--test/routines/level1/xcopy.h4
-rw-r--r--test/routines/level1/xdot.h6
-rw-r--r--test/routines/level1/xdotc.h6
-rw-r--r--test/routines/level1/xdotu.h6
-rw-r--r--test/routines/level1/xnrm2.h4
-rw-r--r--test/routines/level1/xscal.h2
-rw-r--r--test/routines/level1/xswap.h4
-rw-r--r--test/routines/level2/xgbmv.h6
-rw-r--r--test/routines/level2/xgemv.h6
-rw-r--r--test/routines/level2/xger.h6
-rw-r--r--test/routines/level2/xgerc.h6
-rw-r--r--test/routines/level2/xgeru.h6
-rw-r--r--test/routines/level2/xhbmv.h6
-rw-r--r--test/routines/level2/xhemv.h6
-rw-r--r--test/routines/level2/xher.h4
-rw-r--r--test/routines/level2/xher2.h6
-rw-r--r--test/routines/level2/xhpmv.h6
-rw-r--r--test/routines/level2/xhpr.h4
-rw-r--r--test/routines/level2/xhpr2.h6
-rw-r--r--test/routines/level2/xsbmv.h6
-rw-r--r--test/routines/level2/xspmv.h6
-rw-r--r--test/routines/level2/xspr.h4
-rw-r--r--test/routines/level2/xspr2.h6
-rw-r--r--test/routines/level2/xsymv.h6
-rw-r--r--test/routines/level2/xsyr.h4
-rw-r--r--test/routines/level2/xsyr2.h6
-rw-r--r--test/routines/level2/xtbmv.h4
-rw-r--r--test/routines/level2/xtpmv.h4
-rw-r--r--test/routines/level2/xtrmv.h4
-rw-r--r--test/routines/level3/xgemm.h6
-rw-r--r--test/routines/level3/xhemm.h6
-rw-r--r--test/routines/level3/xher2k.h6
-rw-r--r--test/routines/level3/xherk.h4
-rw-r--r--test/routines/level3/xsymm.h6
-rw-r--r--test/routines/level3/xsyr2k.h6
-rw-r--r--test/routines/level3/xsyrk.h4
-rw-r--r--test/routines/level3/xtrmm.h4
-rw-r--r--test/wrapper_cblas.h398
-rw-r--r--test/wrapper_clblas.h1961
99 files changed, 1845 insertions, 860 deletions
diff --git a/test/correctness/routines/level1/xamax.cc b/test/correctness/routines/level1/xamax.cc
index ade09e7a..648abaa6 100644
--- a/test/correctness/routines/level1/xamax.cc
+++ b/test/correctness/routines/level1/xamax.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX");
clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX");
clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX");
+ clblast::RunTests<clblast::TestXamax<half>, half, half>(argc, argv, true, "iHAMAX");
return 0;
}
diff --git a/test/correctness/routines/level1/xasum.cc b/test/correctness/routines/level1/xasum.cc
index 5ec20596..d3b036c7 100644
--- a/test/correctness/routines/level1/xasum.cc
+++ b/test/correctness/routines/level1/xasum.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXasum<double>, double, double>(argc, argv, true, "DASUM");
clblast::RunTests<clblast::TestXasum<float2>, float2, float2>(argc, argv, true, "ScASUM");
clblast::RunTests<clblast::TestXasum<double2>, double2, double2>(argc, argv, true, "DzASUM");
+ clblast::RunTests<clblast::TestXasum<half>, half, half>(argc, argv, true, "HASUM");
return 0;
}
diff --git a/test/correctness/routines/level1/xaxpy.cc b/test/correctness/routines/level1/xaxpy.cc
index 746e0001..04f4c128 100644
--- a/test/correctness/routines/level1/xaxpy.cc
+++ b/test/correctness/routines/level1/xaxpy.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXaxpy<double>, double, double>(argc, argv, true, "DAXPY");
clblast::RunTests<clblast::TestXaxpy<float2>, float2, float2>(argc, argv, true, "CAXPY");
clblast::RunTests<clblast::TestXaxpy<double2>, double2, double2>(argc, argv, true, "ZAXPY");
+ clblast::RunTests<clblast::TestXaxpy<half>, half, half>(argc, argv, true, "HAXPY");
return 0;
}
diff --git a/test/correctness/routines/level1/xcopy.cc b/test/correctness/routines/level1/xcopy.cc
index 3e16ffc6..316c6982 100644
--- a/test/correctness/routines/level1/xcopy.cc
+++ b/test/correctness/routines/level1/xcopy.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXcopy<double>, double, double>(argc, argv, true, "DCOPY");
clblast::RunTests<clblast::TestXcopy<float2>, float2, float2>(argc, argv, true, "CCOPY");
clblast::RunTests<clblast::TestXcopy<double2>, double2, double2>(argc, argv, true, "ZCOPY");
+ clblast::RunTests<clblast::TestXcopy<half>, half, half>(argc, argv, true, "HCOPY");
return 0;
}
diff --git a/test/correctness/routines/level1/xdot.cc b/test/correctness/routines/level1/xdot.cc
index 5ea105e0..72dc9d5e 100644
--- a/test/correctness/routines/level1/xdot.cc
+++ b/test/correctness/routines/level1/xdot.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXdot<float>, float, float>(argc, argv, false, "SDOT");
clblast::RunTests<clblast::TestXdot<double>, double, double>(argc, argv, true, "DDOT");
+ clblast::RunTests<clblast::TestXdot<half>, half, half>(argc, argv, true, "HDOT");
return 0;
}
diff --git a/test/correctness/routines/level1/xnrm2.cc b/test/correctness/routines/level1/xnrm2.cc
index 97fb0ad6..0fe8dc33 100644
--- a/test/correctness/routines/level1/xnrm2.cc
+++ b/test/correctness/routines/level1/xnrm2.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXnrm2<double>, double, double>(argc, argv, true, "DNRM2");
clblast::RunTests<clblast::TestXnrm2<float2>, float2, float2>(argc, argv, true, "ScNRM2");
clblast::RunTests<clblast::TestXnrm2<double2>, double2, double2>(argc, argv, true, "DzNRM2");
+ clblast::RunTests<clblast::TestXnrm2<half>, half, half>(argc, argv, true, "HNRM2");
return 0;
}
diff --git a/test/correctness/routines/level1/xscal.cc b/test/correctness/routines/level1/xscal.cc
index 4d138fad..9146e5ce 100644
--- a/test/correctness/routines/level1/xscal.cc
+++ b/test/correctness/routines/level1/xscal.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXscal<double>, double, double>(argc, argv, true, "DSCAL");
clblast::RunTests<clblast::TestXscal<float2>, float2, float2>(argc, argv, true, "CSCAL");
clblast::RunTests<clblast::TestXscal<double2>, double2, double2>(argc, argv, true, "ZSCAL");
+ clblast::RunTests<clblast::TestXscal<half>, half, half>(argc, argv, true, "HSCAL");
return 0;
}
diff --git a/test/correctness/routines/level1/xswap.cc b/test/correctness/routines/level1/xswap.cc
index 38f110f7..636a5b0f 100644
--- a/test/correctness/routines/level1/xswap.cc
+++ b/test/correctness/routines/level1/xswap.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXswap<double>, double, double>(argc, argv, true, "DSWAP");
clblast::RunTests<clblast::TestXswap<float2>, float2, float2>(argc, argv, true, "CSWAP");
clblast::RunTests<clblast::TestXswap<double2>, double2, double2>(argc, argv, true, "ZSWAP");
+ clblast::RunTests<clblast::TestXswap<half>, half, half>(argc, argv, true, "HSWAP");
return 0;
}
diff --git a/test/correctness/routines/level2/xgbmv.cc b/test/correctness/routines/level2/xgbmv.cc
index b28c5978..528a3325 100644
--- a/test/correctness/routines/level2/xgbmv.cc
+++ b/test/correctness/routines/level2/xgbmv.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXgbmv<double>, double, double>(argc, argv, true, "DGBMV");
clblast::RunTests<clblast::TestXgbmv<float2>, float2, float2>(argc, argv, true, "CGBMV");
clblast::RunTests<clblast::TestXgbmv<double2>, double2, double2>(argc, argv, true, "ZGBMV");
+ clblast::RunTests<clblast::TestXgbmv<half>, half, half>(argc, argv, true, "HGBMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xgemv.cc b/test/correctness/routines/level2/xgemv.cc
index 14eb74d1..fc1cf3eb 100644
--- a/test/correctness/routines/level2/xgemv.cc
+++ b/test/correctness/routines/level2/xgemv.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXgemv<double>, double, double>(argc, argv, true, "DGEMV");
clblast::RunTests<clblast::TestXgemv<float2>, float2, float2>(argc, argv, true, "CGEMV");
clblast::RunTests<clblast::TestXgemv<double2>, double2, double2>(argc, argv, true, "ZGEMV");
+ clblast::RunTests<clblast::TestXgemv<half>, half, half>(argc, argv, true, "HGEMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xger.cc b/test/correctness/routines/level2/xger.cc
index c37a5c41..c3c33ae6 100644
--- a/test/correctness/routines/level2/xger.cc
+++ b/test/correctness/routines/level2/xger.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXger<float>, float, float>(argc, argv, false, "SGER");
clblast::RunTests<clblast::TestXger<double>, double, double>(argc, argv, true, "DGER");
+ clblast::RunTests<clblast::TestXger<half>, half, half>(argc, argv, true, "HGER");
return 0;
}
diff --git a/test/correctness/routines/level2/xsbmv.cc b/test/correctness/routines/level2/xsbmv.cc
index 212e2c3a..c2effcc2 100644
--- a/test/correctness/routines/level2/xsbmv.cc
+++ b/test/correctness/routines/level2/xsbmv.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsbmv<float>, float, float>(argc, argv, false, "SSBMV");
clblast::RunTests<clblast::TestXsbmv<double>, double, double>(argc, argv, true, "DSBMV");
+ clblast::RunTests<clblast::TestXsbmv<half>, half, half>(argc, argv, true, "HSBMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xspmv.cc b/test/correctness/routines/level2/xspmv.cc
index dc833024..4142636d 100644
--- a/test/correctness/routines/level2/xspmv.cc
+++ b/test/correctness/routines/level2/xspmv.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXspmv<float>, float, float>(argc, argv, false, "SSPMV");
clblast::RunTests<clblast::TestXspmv<double>, double, double>(argc, argv, true, "DSPMV");
+ clblast::RunTests<clblast::TestXspmv<half>, half, half>(argc, argv, true, "HSPMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xspr.cc b/test/correctness/routines/level2/xspr.cc
index a0104dd4..c068b448 100644
--- a/test/correctness/routines/level2/xspr.cc
+++ b/test/correctness/routines/level2/xspr.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXspr<float>, float, float>(argc, argv, false, "SSPR");
clblast::RunTests<clblast::TestXspr<double>, double, double>(argc, argv, true, "DSPR");
+ clblast::RunTests<clblast::TestXspr<half>, half, half>(argc, argv, true, "HSPR");
return 0;
}
diff --git a/test/correctness/routines/level2/xspr2.cc b/test/correctness/routines/level2/xspr2.cc
index 5fe5827f..904870d5 100644
--- a/test/correctness/routines/level2/xspr2.cc
+++ b/test/correctness/routines/level2/xspr2.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXspr2<float>, float, float>(argc, argv, false, "SSPR2");
clblast::RunTests<clblast::TestXspr2<double>, double, double>(argc, argv, true, "DSPR2");
+ clblast::RunTests<clblast::TestXspr2<half>, half, half>(argc, argv, true, "HSPR2");
return 0;
}
diff --git a/test/correctness/routines/level2/xsymv.cc b/test/correctness/routines/level2/xsymv.cc
index 6224739f..eb9b6eb7 100644
--- a/test/correctness/routines/level2/xsymv.cc
+++ b/test/correctness/routines/level2/xsymv.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsymv<float>, float, float>(argc, argv, false, "SSYMV");
clblast::RunTests<clblast::TestXsymv<double>, double, double>(argc, argv, true, "DSYMV");
+ clblast::RunTests<clblast::TestXsymv<half>, half, half>(argc, argv, true, "HSYMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xsyr.cc b/test/correctness/routines/level2/xsyr.cc
index a47b918f..eccf95e0 100644
--- a/test/correctness/routines/level2/xsyr.cc
+++ b/test/correctness/routines/level2/xsyr.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsyr<float>, float, float>(argc, argv, false, "SSYR");
clblast::RunTests<clblast::TestXsyr<double>, double, double>(argc, argv, true, "DSYR");
+ clblast::RunTests<clblast::TestXsyr<half>, half, half>(argc, argv, true, "HSYR");
return 0;
}
diff --git a/test/correctness/routines/level2/xsyr2.cc b/test/correctness/routines/level2/xsyr2.cc
index 1743632c..46c939d2 100644
--- a/test/correctness/routines/level2/xsyr2.cc
+++ b/test/correctness/routines/level2/xsyr2.cc
@@ -20,6 +20,7 @@ using double2 = clblast::double2;
int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsyr2<float>, float, float>(argc, argv, false, "SSYR2");
clblast::RunTests<clblast::TestXsyr2<double>, double, double>(argc, argv, true, "DSYR2");
+ clblast::RunTests<clblast::TestXsyr2<half>, half, half>(argc, argv, true, "HSYR2");
return 0;
}
diff --git a/test/correctness/routines/level2/xtbmv.cc b/test/correctness/routines/level2/xtbmv.cc
index d3bbbade..252abdc4 100644
--- a/test/correctness/routines/level2/xtbmv.cc
+++ b/test/correctness/routines/level2/xtbmv.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXtbmv<double>, double, double>(argc, argv, true, "DTBMV");
clblast::RunTests<clblast::TestXtbmv<float2>, float2, float2>(argc, argv, true, "CTBMV");
clblast::RunTests<clblast::TestXtbmv<double2>, double2, double2>(argc, argv, true, "ZTBMV");
+ clblast::RunTests<clblast::TestXtbmv<half>, half, half>(argc, argv, true, "HTBMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xtpmv.cc b/test/correctness/routines/level2/xtpmv.cc
index 95489a65..b8776faa 100644
--- a/test/correctness/routines/level2/xtpmv.cc
+++ b/test/correctness/routines/level2/xtpmv.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXtpmv<double>, double, double>(argc, argv, true, "DTPMV");
clblast::RunTests<clblast::TestXtpmv<float2>, float2, float2>(argc, argv, true, "CTPMV");
clblast::RunTests<clblast::TestXtpmv<double2>, double2, double2>(argc, argv, true, "ZTPMV");
+ clblast::RunTests<clblast::TestXtpmv<half>, half, half>(argc, argv, true, "HTPMV");
return 0;
}
diff --git a/test/correctness/routines/level2/xtrmv.cc b/test/correctness/routines/level2/xtrmv.cc
index ca50af88..256fe900 100644
--- a/test/correctness/routines/level2/xtrmv.cc
+++ b/test/correctness/routines/level2/xtrmv.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXtrmv<double>, double, double>(argc, argv, true, "DTRMV");
clblast::RunTests<clblast::TestXtrmv<float2>, float2, float2>(argc, argv, true, "CTRMV");
clblast::RunTests<clblast::TestXtrmv<double2>, double2, double2>(argc, argv, true, "ZTRMV");
+ clblast::RunTests<clblast::TestXtrmv<half>, half, half>(argc, argv, true, "HTRMV");
return 0;
}
diff --git a/test/correctness/routines/level3/xgemm.cc b/test/correctness/routines/level3/xgemm.cc
index 632724ed..f8c8a891 100644
--- a/test/correctness/routines/level3/xgemm.cc
+++ b/test/correctness/routines/level3/xgemm.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXgemm<double>, double, double>(argc, argv, true, "DGEMM");
clblast::RunTests<clblast::TestXgemm<float2>, float2, float2>(argc, argv, true, "CGEMM");
clblast::RunTests<clblast::TestXgemm<double2>, double2, double2>(argc, argv, true, "ZGEMM");
+ clblast::RunTests<clblast::TestXgemm<half>, half, half>(argc, argv, true, "HGEMM");
return 0;
}
diff --git a/test/correctness/routines/level3/xsymm.cc b/test/correctness/routines/level3/xsymm.cc
index 046fca16..c29f03dd 100644
--- a/test/correctness/routines/level3/xsymm.cc
+++ b/test/correctness/routines/level3/xsymm.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsymm<double>, double, double>(argc, argv, true, "DSYMM");
clblast::RunTests<clblast::TestXsymm<float2>, float2, float2>(argc, argv, true, "CSYMM");
clblast::RunTests<clblast::TestXsymm<double2>, double2, double2>(argc, argv, true, "ZSYMM");
+ clblast::RunTests<clblast::TestXsymm<half>, half, half>(argc, argv, true, "HSYMM");
return 0;
}
diff --git a/test/correctness/routines/level3/xsyr2k.cc b/test/correctness/routines/level3/xsyr2k.cc
index db2b83d9..9f9c87d8 100644
--- a/test/correctness/routines/level3/xsyr2k.cc
+++ b/test/correctness/routines/level3/xsyr2k.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsyr2k<double>, double, double>(argc, argv, true, "DSYR2K");
clblast::RunTests<clblast::TestXsyr2k<float2>, float2, float2>(argc, argv, true, "CSYR2K");
clblast::RunTests<clblast::TestXsyr2k<double2>, double2, double2>(argc, argv, true, "ZSYR2K");
+ clblast::RunTests<clblast::TestXsyr2k<half>, half, half>(argc, argv, true, "HSYR2K");
return 0;
}
diff --git a/test/correctness/routines/level3/xsyrk.cc b/test/correctness/routines/level3/xsyrk.cc
index 3dad3535..12343074 100644
--- a/test/correctness/routines/level3/xsyrk.cc
+++ b/test/correctness/routines/level3/xsyrk.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXsyrk<double>, double, double>(argc, argv, true, "DSYRK");
clblast::RunTests<clblast::TestXsyrk<float2>, float2, float2>(argc, argv, true, "CSYRK");
clblast::RunTests<clblast::TestXsyrk<double2>, double2, double2>(argc, argv, true, "ZSYRK");
+ clblast::RunTests<clblast::TestXsyrk<half>, half, half>(argc, argv, true, "HSYRK");
return 0;
}
diff --git a/test/correctness/routines/level3/xtrmm.cc b/test/correctness/routines/level3/xtrmm.cc
index 2d843e3e..aca73f0d 100644
--- a/test/correctness/routines/level3/xtrmm.cc
+++ b/test/correctness/routines/level3/xtrmm.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXtrmm<double>, double, double>(argc, argv, true, "DTRMM");
clblast::RunTests<clblast::TestXtrmm<float2>, float2, float2>(argc, argv, true, "CTRMM");
clblast::RunTests<clblast::TestXtrmm<double2>, double2, double2>(argc, argv, true, "ZTRMM");
+ clblast::RunTests<clblast::TestXtrmm<half>, half, half>(argc, argv, true, "HTRMM");
return 0;
}
diff --git a/test/correctness/routines/level3/xtrsm.cc b/test/correctness/routines/level3/xtrsm.cc
index b5f5045e..b050269a 100644
--- a/test/correctness/routines/level3/xtrsm.cc
+++ b/test/correctness/routines/level3/xtrsm.cc
@@ -22,6 +22,7 @@ int main(int argc, char *argv[]) {
clblast::RunTests<clblast::TestXtrsm<double>, double, double>(argc, argv, true, "DTRSM");
clblast::RunTests<clblast::TestXtrsm<float2>, float2, float2>(argc, argv, true, "CTRSM");
clblast::RunTests<clblast::TestXtrsm<double2>, double2, double2>(argc, argv, true, "ZTRSM");
+ clblast::RunTests<clblast::TestXtrsm<half>, half, half>(argc, argv, true, "HTRSM");
return 0;
}
diff --git a/test/correctness/testblas.cc b/test/correctness/testblas.cc
index e70c0361..50871402 100644
--- a/test/correctness/testblas.cc
+++ b/test/correctness/testblas.cc
@@ -20,6 +20,7 @@ namespace clblast {
// =================================================================================================
// The transpose-options to test with (data-type dependent)
+template <> const std::vector<Transpose> TestBlas<half,half>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> TestBlas<float,float>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> TestBlas<double,double>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> TestBlas<float2,float2>::kTransposes = {Transpose::kNo, Transpose::kYes, Transpose::kConjugate};
@@ -147,10 +148,8 @@ void TestBlas<T,U>::TestRegular(std::vector<Arguments<U>> &test_vector, const st
if (verbose_) {
if (get_id2_(args) == 1) { fprintf(stdout, "\n Error at index %zu: ", id1); }
else { fprintf(stdout, "\n Error at %zu,%zu: ", id1, id2); }
- std::cout << result1[index];
- fprintf(stdout, " (reference) versus ");
- std::cout << result2[index];
- fprintf(stdout, " (CLBlast)");
+ fprintf(stdout, " %s (reference) versus ", ToString(result1[index]).c_str());
+ fprintf(stdout, " %s (CLBlast)", ToString(result2[index]).c_str());
}
}
}
@@ -171,6 +170,7 @@ template <typename T, typename U>
void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const std::string &name) {
if (!PrecisionSupported<T>(device_)) { return; }
if (!compare_clblas_) { return; }
+ if (std::is_same<T, half>::value) { return; }
TestStart("invalid buffer sizes", name);
// Iterates over all the to-be-tested combinations of arguments
@@ -222,6 +222,7 @@ void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const st
// =================================================================================================
// Compiles the templated class
+template class TestBlas<half, half>;
template class TestBlas<float, float>;
template class TestBlas<double, double>;
template class TestBlas<float2, float2>;
diff --git a/test/correctness/tester.cc b/test/correctness/tester.cc
index 85ae7091..5b603585 100644
--- a/test/correctness/tester.cc
+++ b/test/correctness/tester.cc
@@ -351,11 +351,11 @@ bool TestSimilarity(const T val1, const T val2) {
}
}
-// Compiles the default case for non-complex data-types
+// Compiles the default case for standard data-types
template bool TestSimilarity<float>(const float, const float);
template bool TestSimilarity<double>(const double, const double);
-// Specialisations for complex data-types
+// Specialisations for non-standard data-types
template <>
bool TestSimilarity(const float2 val1, const float2 val2) {
auto real = TestSimilarity(val1.real(), val2.real());
@@ -368,6 +368,10 @@ bool TestSimilarity(const double2 val1, const double2 val2) {
auto imag = TestSimilarity(val1.imag(), val2.imag());
return (real && imag);
}
+template <>
+bool TestSimilarity(const half val1, const half val2) {
+ return TestSimilarity(HalfToFloat(val1), HalfToFloat(val2));
+}
// =================================================================================================
@@ -389,10 +393,15 @@ template <> const std::vector<double2> GetExampleScalars(const bool full_test) {
if (full_test) { return {{0.0, 0.0}, {1.0, 1.3}, {2.42, 3.14}}; }
else { return {{2.42, 3.14}}; }
}
+template <> const std::vector<half> GetExampleScalars(const bool full_test) {
+ if (full_test) { return {FloatToHalf(0.0f), FloatToHalf(1.0f), FloatToHalf(3.14f)}; }
+ else { return {FloatToHalf(3.14f)}; }
+}
// =================================================================================================
// Compiles the templated class
+template class Tester<half, half>;
template class Tester<float, float>;
template class Tester<double, double>;
template class Tester<float2, float2>;
diff --git a/test/performance/client.cc b/test/performance/client.cc
index 9aaf1e4e..5a7226df 100644
--- a/test/performance/client.cc
+++ b/test/performance/client.cc
@@ -116,6 +116,17 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const GetMetric
// which is thus always displayed (unless silence is specified).
if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); }
+ // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
+ if (args.precision == Precision::kHalf) {
+ if (args.compare_clblas != 0 || args.compare_cblas != 0) {
+ if (!args.silent) {
+ fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
+ }
+ }
+ args.compare_clblas = 0;
+ args.compare_cblas = 0;
+ }
+
// Returns the arguments
return args;
}
@@ -339,6 +350,7 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
// =================================================================================================
// Compiles the templated class
+template class Client<half,half>;
template class Client<float,float>;
template class Client<double,double>;
template class Client<float2,float2>;
diff --git a/test/performance/routines/level1/xamax.cc b/test/performance/routines/level1/xamax.cc
index 85caa483..4af1f1c0 100644
--- a/test/performance/routines/level1/xamax.cc
+++ b/test/performance/routines/level1/xamax.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXamax<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xasum.cc b/test/performance/routines/level1/xasum.cc
index 2680966e..8e098890 100644
--- a/test/performance/routines/level1/xasum.cc
+++ b/test/performance/routines/level1/xasum.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXasum<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXasum<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xaxpy.cc b/test/performance/routines/level1/xaxpy.cc
index b423bc3a..b48c290d 100644
--- a/test/performance/routines/level1/xaxpy.cc
+++ b/test/performance/routines/level1/xaxpy.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXaxpy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXaxpy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xcopy.cc b/test/performance/routines/level1/xcopy.cc
index c04c6c1c..b7c60f0f 100644
--- a/test/performance/routines/level1/xcopy.cc
+++ b/test/performance/routines/level1/xcopy.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXcopy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXcopy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xdot.cc b/test/performance/routines/level1/xdot.cc
index f4616464..3edf2590 100644
--- a/test/performance/routines/level1/xdot.cc
+++ b/test/performance/routines/level1/xdot.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXdot<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXdot<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xnrm2.cc b/test/performance/routines/level1/xnrm2.cc
index db6ec9ad..f167df95 100644
--- a/test/performance/routines/level1/xnrm2.cc
+++ b/test/performance/routines/level1/xnrm2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXnrm2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXnrm2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xscal.cc b/test/performance/routines/level1/xscal.cc
index bd38f43e..35e21ba8 100644
--- a/test/performance/routines/level1/xscal.cc
+++ b/test/performance/routines/level1/xscal.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXscal<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXscal<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level1/xswap.cc b/test/performance/routines/level1/xswap.cc
index 112641d3..4791d4c3 100644
--- a/test/performance/routines/level1/xswap.cc
+++ b/test/performance/routines/level1/xswap.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXswap<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXswap<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xgbmv.cc b/test/performance/routines/level2/xgbmv.cc
index b050184d..be4056de 100644
--- a/test/performance/routines/level2/xgbmv.cc
+++ b/test/performance/routines/level2/xgbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xgemv.cc b/test/performance/routines/level2/xgemv.cc
index 51ab9a10..50e6225a 100644
--- a/test/performance/routines/level2/xgemv.cc
+++ b/test/performance/routines/level2/xgemv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgemv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xger.cc b/test/performance/routines/level2/xger.cc
index 2d956346..b1b5a268 100644
--- a/test/performance/routines/level2/xger.cc
+++ b/test/performance/routines/level2/xger.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXger<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXger<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsbmv.cc b/test/performance/routines/level2/xsbmv.cc
index eabab3b7..5fb6e8c0 100644
--- a/test/performance/routines/level2/xsbmv.cc
+++ b/test/performance/routines/level2/xsbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspmv.cc b/test/performance/routines/level2/xspmv.cc
index 2a9ef925..e0ee2075 100644
--- a/test/performance/routines/level2/xspmv.cc
+++ b/test/performance/routines/level2/xspmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspr.cc b/test/performance/routines/level2/xspr.cc
index 84331d74..19651679 100644
--- a/test/performance/routines/level2/xspr.cc
+++ b/test/performance/routines/level2/xspr.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xspr2.cc b/test/performance/routines/level2/xspr2.cc
index c42009a1..8745c004 100644
--- a/test/performance/routines/level2/xspr2.cc
+++ b/test/performance/routines/level2/xspr2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXspr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsymv.cc b/test/performance/routines/level2/xsymv.cc
index 3f72fe77..42de1ed5 100644
--- a/test/performance/routines/level2/xsymv.cc
+++ b/test/performance/routines/level2/xsymv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsymv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsyr.cc b/test/performance/routines/level2/xsyr.cc
index 6b31d3a9..310bfb5e 100644
--- a/test/performance/routines/level2/xsyr.cc
+++ b/test/performance/routines/level2/xsyr.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xsyr2.cc b/test/performance/routines/level2/xsyr2.cc
index 0ad59d2d..bbeed3db 100644
--- a/test/performance/routines/level2/xsyr2.cc
+++ b/test/performance/routines/level2/xsyr2.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtbmv.cc b/test/performance/routines/level2/xtbmv.cc
index a3297f34..24eec61f 100644
--- a/test/performance/routines/level2/xtbmv.cc
+++ b/test/performance/routines/level2/xtbmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtpmv.cc b/test/performance/routines/level2/xtpmv.cc
index 72477f2d..2f2487f8 100644
--- a/test/performance/routines/level2/xtpmv.cc
+++ b/test/performance/routines/level2/xtpmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtpmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtpmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level2/xtrmv.cc b/test/performance/routines/level2/xtrmv.cc
index 894a7952..3f23afd1 100644
--- a/test/performance/routines/level2/xtrmv.cc
+++ b/test/performance/routines/level2/xtrmv.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xgemm.cc b/test/performance/routines/level3/xgemm.cc
index 91897ee1..8e48dc3a 100644
--- a/test/performance/routines/level3/xgemm.cc
+++ b/test/performance/routines/level3/xgemm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXgemm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsymm.cc b/test/performance/routines/level3/xsymm.cc
index e0feadd1..7eac5537 100644
--- a/test/performance/routines/level3/xsymm.cc
+++ b/test/performance/routines/level3/xsymm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsymm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsyr2k.cc b/test/performance/routines/level3/xsyr2k.cc
index 4a82ddc4..49d00f34 100644
--- a/test/performance/routines/level3/xsyr2k.cc
+++ b/test/performance/routines/level3/xsyr2k.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyr2k<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2k<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xsyrk.cc b/test/performance/routines/level3/xsyrk.cc
index 70f61322..ad0a06b4 100644
--- a/test/performance/routines/level3/xsyrk.cc
+++ b/test/performance/routines/level3/xsyrk.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXsyrk<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyrk<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xtrmm.cc b/test/performance/routines/level3/xtrmm.cc
index 6f6041e4..92526844 100644
--- a/test/performance/routines/level3/xtrmm.cc
+++ b/test/performance/routines/level3/xtrmm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrmm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/performance/routines/level3/xtrsm.cc b/test/performance/routines/level3/xtrsm.cc
index 76ef255a..08e4b4a9 100644
--- a/test/performance/routines/level3/xtrsm.cc
+++ b/test/performance/routines/level3/xtrsm.cc
@@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
- case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+ case clblast::Precision::kHalf:
+ clblast::RunClient<clblast::TestXtrsm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrsm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:
diff --git a/test/routines/level1/xamax.h b/test/routines/level1/xamax.h
index 7b404dc3..12b031bc 100644
--- a/test/routines/level1/xamax.h
+++ b/test/routines/level1/xamax.h
@@ -86,8 +86,8 @@ class TestXamax {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXamax<T>(args.n,
- buffers.scalar(), args.imax_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.imax_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xasum.h b/test/routines/level1/xasum.h
index 6eae3c83..eb83817b 100644
--- a/test/routines/level1/xasum.h
+++ b/test/routines/level1/xasum.h
@@ -86,8 +86,8 @@ class TestXasum {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXasum<T>(args.n,
- buffers.scalar(), args.asum_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.asum_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xaxpy.h b/test/routines/level1/xaxpy.h
index 8f72f570..c241da91 100644
--- a/test/routines/level1/xaxpy.h
+++ b/test/routines/level1/xaxpy.h
@@ -87,8 +87,8 @@ class TestXaxpy {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xcopy.h b/test/routines/level1/xcopy.h
index 0527ca6a..a1ff06ce 100644
--- a/test/routines/level1/xcopy.h
+++ b/test/routines/level1/xcopy.h
@@ -86,8 +86,8 @@ class TestXcopy {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXcopy<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdot.h b/test/routines/level1/xdot.h
index d1c34c0f..0bbc93d5 100644
--- a/test/routines/level1/xdot.h
+++ b/test/routines/level1/xdot.h
@@ -91,9 +91,9 @@ class TestXdot {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdot<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdotc.h b/test/routines/level1/xdotc.h
index a2742cb0..e1cc1854 100644
--- a/test/routines/level1/xdotc.h
+++ b/test/routines/level1/xdotc.h
@@ -91,9 +91,9 @@ class TestXdotc {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotc<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdotu.h b/test/routines/level1/xdotu.h
index 06ce979e..558257cc 100644
--- a/test/routines/level1/xdotu.h
+++ b/test/routines/level1/xdotu.h
@@ -91,9 +91,9 @@ class TestXdotu {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotu<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xnrm2.h b/test/routines/level1/xnrm2.h
index d8a0de4e..19074ca2 100644
--- a/test/routines/level1/xnrm2.h
+++ b/test/routines/level1/xnrm2.h
@@ -86,8 +86,8 @@ class TestXnrm2 {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXnrm2<T>(args.n,
- buffers.scalar(), args.nrm2_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.nrm2_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xscal.h b/test/routines/level1/xscal.h
index 35855dbd..84d14ac7 100644
--- a/test/routines/level1/xscal.h
+++ b/test/routines/level1/xscal.h
@@ -82,7 +82,7 @@ class TestXscal {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXscal(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xswap.h b/test/routines/level1/xswap.h
index ae69d3be..e870b602 100644
--- a/test/routines/level1/xswap.h
+++ b/test/routines/level1/xswap.h
@@ -86,8 +86,8 @@ class TestXswap {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXswap<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgbmv.h b/test/routines/level2/xgbmv.h
index c88cdf2a..c777ff73 100644
--- a/test/routines/level2/xgbmv.h
+++ b/test/routines/level2/xgbmv.h
@@ -102,9 +102,9 @@ class TestXgbmv {
auto status = clblasXgbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.kl, args.ku, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h
index cf63d55f..f8a7e1d0 100644
--- a/test/routines/level2/xgemv.h
+++ b/test/routines/level2/xgemv.h
@@ -102,9 +102,9 @@ class TestXgemv {
auto status = clblasXgemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xger.h b/test/routines/level2/xger.h
index ae142e2e..e0d1fe49 100644
--- a/test/routines/level2/xger.h
+++ b/test/routines/level2/xger.h
@@ -97,9 +97,9 @@ class TestXger {
auto event = cl_event{};
auto status = clblasXger(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgerc.h b/test/routines/level2/xgerc.h
index b236aef6..7449146b 100644
--- a/test/routines/level2/xgerc.h
+++ b/test/routines/level2/xgerc.h
@@ -97,9 +97,9 @@ class TestXgerc {
auto event = cl_event{};
auto status = clblasXgerc(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgeru.h b/test/routines/level2/xgeru.h
index 3d3fa439..07837657 100644
--- a/test/routines/level2/xgeru.h
+++ b/test/routines/level2/xgeru.h
@@ -97,9 +97,9 @@ class TestXgeru {
auto event = cl_event{};
auto status = clblasXgeru(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhbmv.h b/test/routines/level2/xhbmv.h
index 4098639a..73194975 100644
--- a/test/routines/level2/xhbmv.h
+++ b/test/routines/level2/xhbmv.h
@@ -96,9 +96,9 @@ class TestXhbmv {
auto status = clblasXhbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhemv.h b/test/routines/level2/xhemv.h
index 5652872d..aabbf14a 100644
--- a/test/routines/level2/xhemv.h
+++ b/test/routines/level2/xhemv.h
@@ -96,9 +96,9 @@ class TestXhemv {
auto status = clblasXhemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xher.h b/test/routines/level2/xher.h
index 3bbf0887..1294832c 100644
--- a/test/routines/level2/xher.h
+++ b/test/routines/level2/xher.h
@@ -91,8 +91,8 @@ class TestXher {
auto status = clblasXher(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xher2.h b/test/routines/level2/xher2.h
index dc7fbe73..5e90174d 100644
--- a/test/routines/level2/xher2.h
+++ b/test/routines/level2/xher2.h
@@ -96,9 +96,9 @@ class TestXher2 {
auto status = clblasXher2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpmv.h b/test/routines/level2/xhpmv.h
index df5a90ee..8face6b6 100644
--- a/test/routines/level2/xhpmv.h
+++ b/test/routines/level2/xhpmv.h
@@ -96,9 +96,9 @@ class TestXhpmv {
auto status = clblasXhpmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpr.h b/test/routines/level2/xhpr.h
index 0db11db0..63cab31f 100644
--- a/test/routines/level2/xhpr.h
+++ b/test/routines/level2/xhpr.h
@@ -91,8 +91,8 @@ class TestXhpr {
auto status = clblasXhpr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpr2.h b/test/routines/level2/xhpr2.h
index e1e5b4c5..64d205a0 100644
--- a/test/routines/level2/xhpr2.h
+++ b/test/routines/level2/xhpr2.h
@@ -96,9 +96,9 @@ class TestXhpr2 {
auto status = clblasXhpr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsbmv.h b/test/routines/level2/xsbmv.h
index fce88f4c..3f1446c8 100644
--- a/test/routines/level2/xsbmv.h
+++ b/test/routines/level2/xsbmv.h
@@ -96,9 +96,9 @@ class TestXsbmv {
auto status = clblasXsbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspmv.h b/test/routines/level2/xspmv.h
index 2fdba77a..2add3cdd 100644
--- a/test/routines/level2/xspmv.h
+++ b/test/routines/level2/xspmv.h
@@ -96,9 +96,9 @@ class TestXspmv {
auto status = clblasXspmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspr.h b/test/routines/level2/xspr.h
index dcacc5de..ad21bdf6 100644
--- a/test/routines/level2/xspr.h
+++ b/test/routines/level2/xspr.h
@@ -91,8 +91,8 @@ class TestXspr {
auto status = clblasXspr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspr2.h b/test/routines/level2/xspr2.h
index 69fda2fb..c55e8181 100644
--- a/test/routines/level2/xspr2.h
+++ b/test/routines/level2/xspr2.h
@@ -96,9 +96,9 @@ class TestXspr2 {
auto status = clblasXspr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsymv.h b/test/routines/level2/xsymv.h
index 16f94d6f..b6583a24 100644
--- a/test/routines/level2/xsymv.h
+++ b/test/routines/level2/xsymv.h
@@ -96,9 +96,9 @@ class TestXsymv {
auto status = clblasXsymv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsyr.h b/test/routines/level2/xsyr.h
index a66dd271..f3929588 100644
--- a/test/routines/level2/xsyr.h
+++ b/test/routines/level2/xsyr.h
@@ -91,8 +91,8 @@ class TestXsyr {
auto status = clblasXsyr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsyr2.h b/test/routines/level2/xsyr2.h
index a36815e5..8cdb6a14 100644
--- a/test/routines/level2/xsyr2.h
+++ b/test/routines/level2/xsyr2.h
@@ -96,9 +96,9 @@ class TestXsyr2 {
auto status = clblasXsyr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtbmv.h b/test/routines/level2/xtbmv.h
index 1425b60b..9c4131ec 100644
--- a/test/routines/level2/xtbmv.h
+++ b/test/routines/level2/xtbmv.h
@@ -92,8 +92,8 @@ class TestXtbmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n, args.kl,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtpmv.h b/test/routines/level2/xtpmv.h
index a834b437..58249227 100644
--- a/test/routines/level2/xtpmv.h
+++ b/test/routines/level2/xtpmv.h
@@ -92,8 +92,8 @@ class TestXtpmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtrmv.h b/test/routines/level2/xtrmv.h
index cd502d5d..635a1319 100644
--- a/test/routines/level2/xtrmv.h
+++ b/test/routines/level2/xtrmv.h
@@ -92,8 +92,8 @@ class TestXtrmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xgemm.h b/test/routines/level3/xgemm.h
index cd5c2acd..842dae93 100644
--- a/test/routines/level3/xgemm.h
+++ b/test/routines/level3/xgemm.h
@@ -105,9 +105,9 @@ class TestXgemm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.b_transpose),
args.m, args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xhemm.h b/test/routines/level3/xhemm.h
index edc71024..106b99ff 100644
--- a/test/routines/level3/xhemm.h
+++ b/test/routines/level3/xhemm.h
@@ -105,9 +105,9 @@ class TestXhemm {
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xher2k.h b/test/routines/level3/xher2k.h
index a78e1293..e2f4448f 100644
--- a/test/routines/level3/xher2k.h
+++ b/test/routines/level3/xher2k.h
@@ -105,9 +105,9 @@ class TestXher2k {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, alpha2,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xherk.h b/test/routines/level3/xherk.h
index 245293d6..43d7cfcd 100644
--- a/test/routines/level3/xherk.h
+++ b/test/routines/level3/xherk.h
@@ -95,8 +95,8 @@ class TestXherk {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsymm.h b/test/routines/level3/xsymm.h
index e638b735..c32b4cf7 100644
--- a/test/routines/level3/xsymm.h
+++ b/test/routines/level3/xsymm.h
@@ -105,9 +105,9 @@ class TestXsymm {
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsyr2k.h b/test/routines/level3/xsyr2k.h
index abac20f4..57c3c203 100644
--- a/test/routines/level3/xsyr2k.h
+++ b/test/routines/level3/xsyr2k.h
@@ -103,9 +103,9 @@ class TestXsyr2k {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsyrk.h b/test/routines/level3/xsyrk.h
index 8a5fcb5f..6c3a3786 100644
--- a/test/routines/level3/xsyrk.h
+++ b/test/routines/level3/xsyrk.h
@@ -95,8 +95,8 @@ class TestXsyrk {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xtrmm.h b/test/routines/level3/xtrmm.h
index 7c9c21bc..3eb63030 100644
--- a/test/routines/level3/xtrmm.h
+++ b/test/routines/level3/xtrmm.h
@@ -97,8 +97,8 @@ class TestXtrmm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h
index 529acfbf..bf59aa94 100644
--- a/test/wrapper_cblas.h
+++ b/test/wrapper_cblas.h
@@ -161,6 +161,17 @@ void cblasXswap(const size_t n,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXswap(const size_t n,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXswap(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
void cblasXscal(const size_t n,
@@ -193,6 +204,15 @@ void cblasXscal(const size_t n,
alpha_array.data(),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXscal(const size_t n,
+ const half alpha,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ cblasXscal(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
void cblasXcopy(const size_t n,
@@ -223,6 +243,16 @@ void cblasXcopy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXcopy(const size_t n,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXcopy(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
void cblasXaxpy(const size_t n,
@@ -263,6 +293,18 @@ void cblasXaxpy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXaxpy(const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXaxpy(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SDOT/DDOT
void cblasXdot(const size_t n,
@@ -281,6 +323,19 @@ void cblasXdot(const size_t n,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXdot(const size_t n,
+ std::vector<half>& dot_buffer, const size_t dot_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ auto dot_buffer_bis = HalfToFloatBuffer(dot_buffer);
+ cblasXdot(n,
+ dot_buffer_bis, dot_offset,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(dot_buffer, dot_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CDOTU/ZDOTU
void cblasXdotu(const size_t n,
@@ -347,6 +402,16 @@ void cblasXnrm2(const size_t n,
nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
+void cblasXnrm2(const size_t n,
+ std::vector<half>& nrm2_buffer, const size_t nrm2_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto nrm2_buffer_bis = HalfToFloatBuffer(nrm2_buffer);
+ cblasXnrm2(n,
+ nrm2_buffer_bis, nrm2_offset,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(nrm2_buffer, nrm2_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM
void cblasXasum(const size_t n,
@@ -373,8 +438,18 @@ void cblasXasum(const size_t n,
asum_buffer[asum_offset].real(cblas_dzasum(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
+void cblasXasum(const size_t n,
+ std::vector<half>& asum_buffer, const size_t asum_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto asum_buffer_bis = HalfToFloatBuffer(asum_buffer);
+ cblasXasum(n,
+ asum_buffer_bis, asum_offset,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(asum_buffer, asum_buffer_bis);
+}
-// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX
+// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
void cblasXamax(const size_t n,
std::vector<float>& imax_buffer, const size_t imax_offset,
const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) {
@@ -399,6 +474,16 @@ void cblasXamax(const size_t n,
((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXamax(const size_t n,
+ std::vector<half>& imax_buffer, const size_t imax_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto imax_buffer_bis = HalfToFloatBuffer(imax_buffer);
+ cblasXamax(n,
+ imax_buffer_bis, imax_offset,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(imax_buffer, imax_buffer_bis);
+}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@@ -469,6 +554,25 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXgemv(layout, a_transpose,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
@@ -535,6 +639,25 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
+void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CHEMV/ZHEMV
void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -675,6 +798,25 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXsymv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSBMV/DSBMV
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -707,6 +849,25 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXsbmv(layout, triangle,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSPMV/DSPMV
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -739,6 +900,25 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
+void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& ap_buffer, const size_t ap_offset,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ cblasXspmv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis);
+}
// Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -777,6 +957,18 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ cblasXtrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis);
+}
// Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -815,6 +1007,18 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n, const size_t k,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ cblasXtbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis);
+}
// Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -853,6 +1057,18 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
+void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t n,
+ const std::vector<half>& ap_buffer, const size_t ap_offset,
+ std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ cblasXtpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis);
+}
// Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -995,6 +1211,23 @@ void cblasXger(const CBLAS_ORDER layout,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXger(const CBLAS_ORDER layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ cblasXger(layout,
+ m, n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CGERU/ZGERU
void cblasXgeru(const CBLAS_ORDER layout,
@@ -1187,6 +1420,20 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ cblasXsyr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ a_buffer_bis, a_offset, a_ld);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSPR/DSPR
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1211,6 +1458,20 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
}
+void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ std::vector<half>& ap_buffer, const size_t ap_offset) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer);
+ cblasXspr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ ap_buffer_bis, ap_offset);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSYR2/DSYR2
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1239,6 +1500,23 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
+void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ cblasXsyr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSPR2/DSPR2
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@@ -1267,6 +1545,23 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&ap_buffer[ap_offset]);
}
+void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
+ const size_t n,
+ const half alpha,
+ const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ std::vector<half>& ap_buffer, const size_t ap_offset) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer);
+ cblasXspr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ ap_buffer_bis, ap_offset);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis);
+}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@@ -1337,6 +1632,25 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer);
+ cblasXgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis);
+}
// Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@@ -1403,6 +1717,25 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer);
+ cblasXsymm(layout, side, triangle,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CHEMM/ZHEMM
void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@@ -1497,6 +1830,22 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer);
+ cblasXsyrk(layout, triangle, a_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CHERK/ZHERK
void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
@@ -1591,6 +1940,25 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
+void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer);
+ cblasXsyr2k(layout, triangle, ab_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis);
+}
// Forwards the Netlib BLAS calls for CHER2K/ZHER2K
void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
@@ -1673,6 +2041,20 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
+void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer);
+ cblasXtrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis);
+}
// Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@@ -1721,6 +2103,20 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
+void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer);
+ cblasXtrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis);
+}
// =================================================================================================
} // namespace clblast
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 23c55373..5115b3d9 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -34,104 +34,104 @@ clblasSide convertToCLBLAS(const Side v) { return (v == Side::kLeft) ? clblasLef
// Forwards the clBLAS calls for SROTG/DROTG
template <typename T>
-clblasStatus clblasXrotg(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg(Buffer<T>& sa_buffer, const size_t sa_offset,
+ Buffer<T>& sb_buffer, const size_t sb_offset,
+ Buffer<T>& sc_buffer, const size_t sc_offset,
+ Buffer<T>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
-clblasStatus clblasXrotg<float>(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg<float>(Buffer<float>& sa_buffer, const size_t sa_offset,
+ Buffer<float>& sb_buffer, const size_t sb_offset,
+ Buffer<float>& sc_buffer, const size_t sc_offset,
+ Buffer<float>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasSrotg(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
+ return clblasSrotg(sa_buffer(), sa_offset,
+ sb_buffer(), sb_offset,
+ sc_buffer(), sc_offset,
+ ss_buffer(), ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
-clblasStatus clblasXrotg<double>(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg<double>(Buffer<double>& sa_buffer, const size_t sa_offset,
+ Buffer<double>& sb_buffer, const size_t sb_offset,
+ Buffer<double>& sc_buffer, const size_t sc_offset,
+ Buffer<double>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasDrotg(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
+ return clblasDrotg(sa_buffer(), sa_offset,
+ sb_buffer(), sb_offset,
+ sc_buffer(), sc_offset,
+ ss_buffer(), ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SROTMG/DROTMG
template <typename T>
-clblasStatus clblasXrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg(Buffer<T>& sd1_buffer, const size_t sd1_offset,
+ Buffer<T>& sd2_buffer, const size_t sd2_offset,
+ Buffer<T>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<T>& sy1_buffer, const size_t sy1_offset,
+ Buffer<T>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
-clblasStatus clblasXrotmg<float>(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg<float>(Buffer<float>& sd1_buffer, const size_t sd1_offset,
+ Buffer<float>& sd2_buffer, const size_t sd2_offset,
+ Buffer<float>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<float>& sy1_buffer, const size_t sy1_offset,
+ Buffer<float>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasSrotmg(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
+ return clblasSrotmg(sd1_buffer(), sd1_offset,
+ sd2_buffer(), sd2_offset,
+ sx1_buffer(), sx1_offset,
+ sy1_buffer(), sy1_offset,
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
-clblasStatus clblasXrotmg<double>(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg<double>(Buffer<double>& sd1_buffer, const size_t sd1_offset,
+ Buffer<double>& sd2_buffer, const size_t sd2_offset,
+ Buffer<double>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<double>& sy1_buffer, const size_t sy1_offset,
+ Buffer<double>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasDrotmg(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
+ return clblasDrotmg(sd1_buffer(), sd1_offset,
+ sd2_buffer(), sd2_offset,
+ sx1_buffer(), sx1_offset,
+ sy1_buffer(), sy1_offset,
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SROT/DROT
clblasStatus clblasXrot(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
const float cos,
const float sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSrot(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
cos,
sin,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXrot(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
const double cos,
const double sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDrot(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
cos,
sin,
num_queues, queues, num_wait_events, wait_events, events);
@@ -140,316 +140,394 @@ clblasStatus clblasXrot(const size_t n,
// Forwards the clBLAS calls for SROTM/DROTM
template <typename T>
clblasStatus clblasXrotm(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXrotm<float>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSrotm(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- sparam_buffer, sparam_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXrotm<double>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDrotm(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- sparam_buffer, sparam_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
clblasStatus clblasXswap(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXswap<float>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<double>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<float2>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<double2>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXswap<half>(const size_t n,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXswap(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
clblasStatus clblasXscal(const size_t n,
const float alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSscal(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const double alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDscal(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const float2 alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCscal(n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const double2 alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZscal(n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXscal(const size_t n,
+ const half alpha,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXscal(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T>
clblasStatus clblasXcopy(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXcopy<float>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasScopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<double>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<float2>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<double2>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXcopy<half>(const size_t n,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXcopy(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
clblasStatus clblasXaxpy(const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSaxpy(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDaxpy(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCaxpy(n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZaxpy(n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXaxpy(const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXaxpy(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SDOT/DDOT
template <typename T>
clblasStatus clblasXdot(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdot<float>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& dot_buffer, const size_t dot_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, n);
return clblasSdot(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdot<double>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& dot_buffer, const size_t dot_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, n);
return clblasDdot(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXdot<half>(const size_t n,
+ Buffer<half>& dot_buffer, const size_t dot_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto dot_buffer_bis = HalfToFloatBuffer(dot_buffer, queues[0]);
+ auto status = clblasXdot(n,
+ dot_buffer_bis, dot_offset,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(dot_buffer, dot_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CDOTU/ZDOTU
template <typename T>
clblasStatus clblasXdotu(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdotu<float2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& dot_buffer, const size_t dot_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotu(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdotu<double2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& dot_buffer, const size_t dot_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotu(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -457,42 +535,42 @@ clblasStatus clblasXdotu<double2>(const size_t n,
// Forwards the clBLAS calls for CDOTC/ZDOTC
template <typename T>
clblasStatus clblasXdotc(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdotc<float2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& dot_buffer, const size_t dot_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotc(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdotc<double2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& dot_buffer, const size_t dot_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotc(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -500,206 +578,251 @@ clblasStatus clblasXdotc<double2>(const size_t n,
// Forwards the clBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
template <typename T>
clblasStatus clblasXnrm2(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXnrm2<float>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, 2*n);
return clblasSnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<double>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, 2*n);
return clblasDnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<float2>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, 2*n);
return clblasScnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<double2>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, 2*n);
return clblasDznrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXnrm2<half>(const size_t n,
+ Buffer<half>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto nrm2_buffer_bis = HalfToFloatBuffer(nrm2_buffer, queues[0]);
+ auto status = clblasXnrm2(n,
+ nrm2_buffer_bis, nrm2_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(nrm2_buffer, nrm2_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM
template <typename T>
clblasStatus clblasXasum(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& asum_buffer, const size_t asum_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXasum<float>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& asum_buffer, const size_t asum_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, n);
return clblasSasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<double>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& asum_buffer, const size_t asum_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, n);
return clblasDasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<float2>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& asum_buffer, const size_t asum_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasScasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<double2>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& asum_buffer, const size_t asum_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasDzasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXasum<half>(const size_t n,
+ Buffer<half>& asum_buffer, const size_t asum_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto asum_buffer_bis = HalfToFloatBuffer(asum_buffer, queues[0]);
+ auto status = clblasXasum(n,
+ asum_buffer_bis, asum_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(asum_buffer, asum_buffer_bis, queues[0]);
+ return status;
+}
-// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX
+// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
template <typename T>
clblasStatus clblasXamax(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& imax_buffer, const size_t imax_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXamax<float>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& imax_buffer, const size_t imax_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, 2*n);
return clblasiSamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<double>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& imax_buffer, const size_t imax_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, 2*n);
return clblasiDamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<float2>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& imax_buffer, const size_t imax_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, 2*n);
return clblasiCamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<double2>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& imax_buffer, const size_t imax_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, 2*n);
return clblasiZamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXamax<half>(const size_t n,
+ Buffer<half>& imax_buffer, const size_t imax_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto imax_buffer_bis = HalfToFloatBuffer(imax_buffer, queues[0]);
+ auto status = clblasXamax(n,
+ imax_buffer_bis, imax_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(imax_buffer, imax_buffer_bis, queues[0]);
+ return status;
+}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@@ -709,185 +832,231 @@ clblasStatus clblasXamax<double2>(const size_t n,
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgemv(layout, a_transpose,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgemv(layout, a_transpose,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgemv(layout, a_transpose,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgemv(layout, a_transpose,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
+ const size_t m, const size_t n,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXgemv(layout, a_transpose,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgbmv(layout, a_transpose,
m, n, kl, ku,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgbmv(layout, a_transpose,
m, n, kl, ku,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgbmv(layout, a_transpose,
m, n, kl, ku,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgbmv(layout, a_transpose,
m, n, kl, ku,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CHEMV/ZHEMV
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChemv(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhemv(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -895,37 +1064,37 @@ clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChbmv(layout, triangle,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhbmv(layout, triangle,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -933,37 +1102,37 @@ clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpmv(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpmv(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -971,129 +1140,198 @@ clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsymv(layout, triangle,
n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsymv(layout, triangle,
n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXsymv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSBMV/DSBMV
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsbmv(layout, triangle,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsbmv(layout, triangle,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n, const size_t k,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXsbmv(layout, triangle,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSPMV/DSPMV
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspmv(layout, triangle,
n,
alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspmv(layout, triangle,
n,
alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& ap_buffer, const size_t ap_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const half beta,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXspmv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
template <typename T>
clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtrmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1101,16 +1339,16 @@ clblasStatus clblasXtrmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1118,16 +1356,16 @@ clblasStatus clblasXtrmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1135,16 +1373,16 @@ clblasStatus clblasXtrmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1152,25 +1390,42 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtrmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
template <typename T>
clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtbmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1178,16 +1433,16 @@ clblasStatus clblasXtbmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1195,16 +1450,16 @@ clblasStatus clblasXtbmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1212,16 +1467,16 @@ clblasStatus clblasXtbmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1229,25 +1484,42 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtbmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n, const size_t k,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
template <typename T>
clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& ap_buffer, const size_t ap_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtpmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1255,16 +1527,16 @@ clblasStatus clblasXtpmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1272,16 +1544,16 @@ clblasStatus clblasXtpmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1289,16 +1561,16 @@ clblasStatus clblasXtpmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1306,70 +1578,87 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
+template <>
+clblasStatus clblasXtpmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t n,
+ const Buffer<half>& ap_buffer, const size_t ap_offset,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
template <typename T>
clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtrsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1377,60 +1666,60 @@ clblasStatus clblasXtrsv<double2>(const clblasOrder layout, const clblasUplo tri
template <typename T>
clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtbsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1438,60 +1727,60 @@ clblasStatus clblasXtbsv<double2>(const clblasOrder layout, const clblasUplo tri
template <typename T>
clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& ap_buffer, const size_t ap_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtpsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1499,67 +1788,88 @@ clblasStatus clblasXtpsv<double2>(const clblasOrder layout, const clblasUplo tri
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSger(layout,
m, n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDger(layout,
m, n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXger(const clblasOrder layout,
+ const size_t m, const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXger(layout,
+ m, n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CGERU/ZGERU
clblasStatus clblasXgeru(const clblasOrder layout,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgeru(layout,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgeru(const clblasOrder layout,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgeru(layout,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1567,33 +1877,33 @@ clblasStatus clblasXgeru(const clblasOrder layout,
clblasStatus clblasXgerc(const clblasOrder layout,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgerc(layout,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgerc(const clblasOrder layout,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgerc(layout,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1601,29 +1911,29 @@ clblasStatus clblasXgerc(const clblasOrder layout,
clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1631,29 +1941,29 @@ clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1661,33 +1971,33 @@ clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher2(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher2(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1695,33 +2005,33 @@ clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpr2(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpr2(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1729,129 +2039,207 @@ clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXsyr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSPR/DSPR
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& ap_buffer, const size_t ap_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto status = clblasXspr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ ap_buffer_bis, ap_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSYR2/DSYR2
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXsyr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSPR2/DSPR2
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
+ const size_t n,
+ const half alpha,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& ap_buffer, const size_t ap_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto status = clblasXspr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ ap_buffer_bis, ap_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]);
+ return status;
+}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@@ -1861,185 +2249,231 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgemm(layout, a_transpose, b_transpose,
m, n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgemm(layout, a_transpose, b_transpose,
m, n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgemm(layout, a_transpose, b_transpose,
m, n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgemm(layout, a_transpose, b_transpose,
m, n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsymm(layout, side, triangle,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsymm(layout, side, triangle,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsymm(layout, side, triangle,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsymm(layout, side, triangle,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
+ const size_t m, const size_t n,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsymm(layout, side, triangle,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CHEMM/ZHEMM
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChemm(layout, side, triangle,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhemm(layout, side, triangle,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2047,99 +2481,119 @@ clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyrk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyrk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsyrk(layout, triangle, a_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsyrk(layout, triangle, a_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const half beta,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsyrk(layout, triangle, a_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CHERK/ZHERK
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCherk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZherk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2147,111 +2601,134 @@ clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, co
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr2k(layout, triangle, ab_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr2k(layout, triangle, ab_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsyr2k(layout, triangle, ab_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsyr2k(layout, triangle, ab_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
+ const size_t n, const size_t k,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ const half beta,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsyr2k(layout, triangle, ab_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for CHER2K/ZHER2K
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher2k(layout, triangle, ab_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher2k(layout, triangle, ab_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2259,117 +2736,153 @@ clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, c
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto status = clblasXtrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis, queues[0]);
+ return status;
+}
// Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
+clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
+ const size_t m, const size_t n,
+ const half alpha,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto status = clblasXtrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis, queues[0]);
+ return status;
+}
// =================================================================================================
} // namespace clblast