summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-26 23:36:19 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-26 23:36:19 +0200
commit03182f9d07533f795a498936391da744d982e8e2 (patch)
tree3a73046809927abd1000fe3309f37787d1791976 /test
parentb487d4dd44179293c9e08ddf2ce3ed902fa749c8 (diff)
Added half-precision tests for the clBLAS reference through conversion to single-precision
Diffstat (limited to 'test')
-rw-r--r--test/correctness/testblas.cc1
-rw-r--r--test/routines/level1/xamax.h4
-rw-r--r--test/routines/level1/xasum.h4
-rw-r--r--test/routines/level1/xaxpy.h4
-rw-r--r--test/routines/level1/xcopy.h4
-rw-r--r--test/routines/level1/xdot.h6
-rw-r--r--test/routines/level1/xdotc.h6
-rw-r--r--test/routines/level1/xdotu.h6
-rw-r--r--test/routines/level1/xnrm2.h4
-rw-r--r--test/routines/level1/xscal.h2
-rw-r--r--test/routines/level1/xswap.h4
-rw-r--r--test/routines/level2/xgbmv.h6
-rw-r--r--test/routines/level2/xgemv.h6
-rw-r--r--test/routines/level2/xger.h6
-rw-r--r--test/routines/level2/xgerc.h6
-rw-r--r--test/routines/level2/xgeru.h6
-rw-r--r--test/routines/level2/xhbmv.h6
-rw-r--r--test/routines/level2/xhemv.h6
-rw-r--r--test/routines/level2/xher.h4
-rw-r--r--test/routines/level2/xher2.h6
-rw-r--r--test/routines/level2/xhpmv.h6
-rw-r--r--test/routines/level2/xhpr.h4
-rw-r--r--test/routines/level2/xhpr2.h6
-rw-r--r--test/routines/level2/xsbmv.h6
-rw-r--r--test/routines/level2/xspmv.h6
-rw-r--r--test/routines/level2/xspr.h4
-rw-r--r--test/routines/level2/xspr2.h6
-rw-r--r--test/routines/level2/xsymv.h6
-rw-r--r--test/routines/level2/xsyr.h4
-rw-r--r--test/routines/level2/xsyr2.h6
-rw-r--r--test/routines/level2/xtbmv.h4
-rw-r--r--test/routines/level2/xtpmv.h4
-rw-r--r--test/routines/level2/xtrmv.h4
-rw-r--r--test/routines/level3/xgemm.h6
-rw-r--r--test/routines/level3/xhemm.h6
-rw-r--r--test/routines/level3/xher2k.h6
-rw-r--r--test/routines/level3/xherk.h4
-rw-r--r--test/routines/level3/xsymm.h6
-rw-r--r--test/routines/level3/xsyr2k.h6
-rw-r--r--test/routines/level3/xsyrk.h4
-rw-r--r--test/routines/level3/xtrmm.h4
-rw-r--r--test/wrapper_cblas.h10
-rw-r--r--test/wrapper_clblas.h1928
43 files changed, 1196 insertions, 947 deletions
diff --git a/test/correctness/testblas.cc b/test/correctness/testblas.cc
index cbf8b0a0..50871402 100644
--- a/test/correctness/testblas.cc
+++ b/test/correctness/testblas.cc
@@ -170,6 +170,7 @@ template <typename T, typename U>
void TestBlas<T,U>::TestInvalid(std::vector<Arguments<U>> &test_vector, const std::string &name) {
if (!PrecisionSupported<T>(device_)) { return; }
if (!compare_clblas_) { return; }
+ if (std::is_same<T, half>::value) { return; }
TestStart("invalid buffer sizes", name);
// Iterates over all the to-be-tested combinations of arguments
diff --git a/test/routines/level1/xamax.h b/test/routines/level1/xamax.h
index 7b404dc3..12b031bc 100644
--- a/test/routines/level1/xamax.h
+++ b/test/routines/level1/xamax.h
@@ -86,8 +86,8 @@ class TestXamax {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXamax<T>(args.n,
- buffers.scalar(), args.imax_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.imax_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xasum.h b/test/routines/level1/xasum.h
index 6eae3c83..eb83817b 100644
--- a/test/routines/level1/xasum.h
+++ b/test/routines/level1/xasum.h
@@ -86,8 +86,8 @@ class TestXasum {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXasum<T>(args.n,
- buffers.scalar(), args.asum_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.asum_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xaxpy.h b/test/routines/level1/xaxpy.h
index 8f72f570..c241da91 100644
--- a/test/routines/level1/xaxpy.h
+++ b/test/routines/level1/xaxpy.h
@@ -87,8 +87,8 @@ class TestXaxpy {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xcopy.h b/test/routines/level1/xcopy.h
index 0527ca6a..a1ff06ce 100644
--- a/test/routines/level1/xcopy.h
+++ b/test/routines/level1/xcopy.h
@@ -86,8 +86,8 @@ class TestXcopy {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXcopy<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdot.h b/test/routines/level1/xdot.h
index d1c34c0f..0bbc93d5 100644
--- a/test/routines/level1/xdot.h
+++ b/test/routines/level1/xdot.h
@@ -91,9 +91,9 @@ class TestXdot {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdot<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdotc.h b/test/routines/level1/xdotc.h
index a2742cb0..e1cc1854 100644
--- a/test/routines/level1/xdotc.h
+++ b/test/routines/level1/xdotc.h
@@ -91,9 +91,9 @@ class TestXdotc {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotc<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xdotu.h b/test/routines/level1/xdotu.h
index 06ce979e..558257cc 100644
--- a/test/routines/level1/xdotu.h
+++ b/test/routines/level1/xdotu.h
@@ -91,9 +91,9 @@ class TestXdotu {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotu<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.scalar, args.dot_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xnrm2.h b/test/routines/level1/xnrm2.h
index d8a0de4e..19074ca2 100644
--- a/test/routines/level1/xnrm2.h
+++ b/test/routines/level1/xnrm2.h
@@ -86,8 +86,8 @@ class TestXnrm2 {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXnrm2<T>(args.n,
- buffers.scalar(), args.nrm2_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.scalar, args.nrm2_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xscal.h b/test/routines/level1/xscal.h
index 35855dbd..84d14ac7 100644
--- a/test/routines/level1/xscal.h
+++ b/test/routines/level1/xscal.h
@@ -82,7 +82,7 @@ class TestXscal {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXscal(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level1/xswap.h b/test/routines/level1/xswap.h
index ae69d3be..e870b602 100644
--- a/test/routines/level1/xswap.h
+++ b/test/routines/level1/xswap.h
@@ -86,8 +86,8 @@ class TestXswap {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXswap<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgbmv.h b/test/routines/level2/xgbmv.h
index c88cdf2a..c777ff73 100644
--- a/test/routines/level2/xgbmv.h
+++ b/test/routines/level2/xgbmv.h
@@ -102,9 +102,9 @@ class TestXgbmv {
auto status = clblasXgbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.kl, args.ku, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h
index cf63d55f..f8a7e1d0 100644
--- a/test/routines/level2/xgemv.h
+++ b/test/routines/level2/xgemv.h
@@ -102,9 +102,9 @@ class TestXgemv {
auto status = clblasXgemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xger.h b/test/routines/level2/xger.h
index ae142e2e..e0d1fe49 100644
--- a/test/routines/level2/xger.h
+++ b/test/routines/level2/xger.h
@@ -97,9 +97,9 @@ class TestXger {
auto event = cl_event{};
auto status = clblasXger(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgerc.h b/test/routines/level2/xgerc.h
index b236aef6..7449146b 100644
--- a/test/routines/level2/xgerc.h
+++ b/test/routines/level2/xgerc.h
@@ -97,9 +97,9 @@ class TestXgerc {
auto event = cl_event{};
auto status = clblasXgerc(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xgeru.h b/test/routines/level2/xgeru.h
index 3d3fa439..07837657 100644
--- a/test/routines/level2/xgeru.h
+++ b/test/routines/level2/xgeru.h
@@ -97,9 +97,9 @@ class TestXgeru {
auto event = cl_event{};
auto status = clblasXgeru(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhbmv.h b/test/routines/level2/xhbmv.h
index 4098639a..73194975 100644
--- a/test/routines/level2/xhbmv.h
+++ b/test/routines/level2/xhbmv.h
@@ -96,9 +96,9 @@ class TestXhbmv {
auto status = clblasXhbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhemv.h b/test/routines/level2/xhemv.h
index 5652872d..aabbf14a 100644
--- a/test/routines/level2/xhemv.h
+++ b/test/routines/level2/xhemv.h
@@ -96,9 +96,9 @@ class TestXhemv {
auto status = clblasXhemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xher.h b/test/routines/level2/xher.h
index 3bbf0887..1294832c 100644
--- a/test/routines/level2/xher.h
+++ b/test/routines/level2/xher.h
@@ -91,8 +91,8 @@ class TestXher {
auto status = clblasXher(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xher2.h b/test/routines/level2/xher2.h
index dc7fbe73..5e90174d 100644
--- a/test/routines/level2/xher2.h
+++ b/test/routines/level2/xher2.h
@@ -96,9 +96,9 @@ class TestXher2 {
auto status = clblasXher2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpmv.h b/test/routines/level2/xhpmv.h
index df5a90ee..8face6b6 100644
--- a/test/routines/level2/xhpmv.h
+++ b/test/routines/level2/xhpmv.h
@@ -96,9 +96,9 @@ class TestXhpmv {
auto status = clblasXhpmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpr.h b/test/routines/level2/xhpr.h
index 0db11db0..63cab31f 100644
--- a/test/routines/level2/xhpr.h
+++ b/test/routines/level2/xhpr.h
@@ -91,8 +91,8 @@ class TestXhpr {
auto status = clblasXhpr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xhpr2.h b/test/routines/level2/xhpr2.h
index e1e5b4c5..64d205a0 100644
--- a/test/routines/level2/xhpr2.h
+++ b/test/routines/level2/xhpr2.h
@@ -96,9 +96,9 @@ class TestXhpr2 {
auto status = clblasXhpr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsbmv.h b/test/routines/level2/xsbmv.h
index fce88f4c..3f1446c8 100644
--- a/test/routines/level2/xsbmv.h
+++ b/test/routines/level2/xsbmv.h
@@ -96,9 +96,9 @@ class TestXsbmv {
auto status = clblasXsbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspmv.h b/test/routines/level2/xspmv.h
index 2fdba77a..2add3cdd 100644
--- a/test/routines/level2/xspmv.h
+++ b/test/routines/level2/xspmv.h
@@ -96,9 +96,9 @@ class TestXspmv {
auto status = clblasXspmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspr.h b/test/routines/level2/xspr.h
index dcacc5de..ad21bdf6 100644
--- a/test/routines/level2/xspr.h
+++ b/test/routines/level2/xspr.h
@@ -91,8 +91,8 @@ class TestXspr {
auto status = clblasXspr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xspr2.h b/test/routines/level2/xspr2.h
index 69fda2fb..c55e8181 100644
--- a/test/routines/level2/xspr2.h
+++ b/test/routines/level2/xspr2.h
@@ -96,9 +96,9 @@ class TestXspr2 {
auto status = clblasXspr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsymv.h b/test/routines/level2/xsymv.h
index 16f94d6f..b6583a24 100644
--- a/test/routines/level2/xsymv.h
+++ b/test/routines/level2/xsymv.h
@@ -96,9 +96,9 @@ class TestXsymv {
auto status = clblasXsymv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsyr.h b/test/routines/level2/xsyr.h
index a66dd271..f3929588 100644
--- a/test/routines/level2/xsyr.h
+++ b/test/routines/level2/xsyr.h
@@ -91,8 +91,8 @@ class TestXsyr {
auto status = clblasXsyr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xsyr2.h b/test/routines/level2/xsyr2.h
index a36815e5..8cdb6a14 100644
--- a/test/routines/level2/xsyr2.h
+++ b/test/routines/level2/xsyr2.h
@@ -96,9 +96,9 @@ class TestXsyr2 {
auto status = clblasXsyr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
+ buffers.y_vec, args.y_offset, args.y_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtbmv.h b/test/routines/level2/xtbmv.h
index 1425b60b..9c4131ec 100644
--- a/test/routines/level2/xtbmv.h
+++ b/test/routines/level2/xtbmv.h
@@ -92,8 +92,8 @@ class TestXtbmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n, args.kl,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtpmv.h b/test/routines/level2/xtpmv.h
index a834b437..58249227 100644
--- a/test/routines/level2/xtpmv.h
+++ b/test/routines/level2/xtpmv.h
@@ -92,8 +92,8 @@ class TestXtpmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.ap_mat, args.ap_offset,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level2/xtrmv.h b/test/routines/level2/xtrmv.h
index cd502d5d..635a1319 100644
--- a/test/routines/level2/xtrmv.h
+++ b/test/routines/level2/xtrmv.h
@@ -92,8 +92,8 @@ class TestXtrmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xgemm.h b/test/routines/level3/xgemm.h
index cd5c2acd..842dae93 100644
--- a/test/routines/level3/xgemm.h
+++ b/test/routines/level3/xgemm.h
@@ -105,9 +105,9 @@ class TestXgemm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.b_transpose),
args.m, args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xhemm.h b/test/routines/level3/xhemm.h
index edc71024..106b99ff 100644
--- a/test/routines/level3/xhemm.h
+++ b/test/routines/level3/xhemm.h
@@ -105,9 +105,9 @@ class TestXhemm {
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xher2k.h b/test/routines/level3/xher2k.h
index a78e1293..e2f4448f 100644
--- a/test/routines/level3/xher2k.h
+++ b/test/routines/level3/xher2k.h
@@ -105,9 +105,9 @@ class TestXher2k {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, alpha2,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xherk.h b/test/routines/level3/xherk.h
index 245293d6..43d7cfcd 100644
--- a/test/routines/level3/xherk.h
+++ b/test/routines/level3/xherk.h
@@ -95,8 +95,8 @@ class TestXherk {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsymm.h b/test/routines/level3/xsymm.h
index e638b735..c32b4cf7 100644
--- a/test/routines/level3/xsymm.h
+++ b/test/routines/level3/xsymm.h
@@ -105,9 +105,9 @@ class TestXsymm {
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsyr2k.h b/test/routines/level3/xsyr2k.h
index abac20f4..57c3c203 100644
--- a/test/routines/level3/xsyr2k.h
+++ b/test/routines/level3/xsyr2k.h
@@ -103,9 +103,9 @@ class TestXsyr2k {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xsyrk.h b/test/routines/level3/xsyrk.h
index 8a5fcb5f..6c3a3786 100644
--- a/test/routines/level3/xsyrk.h
+++ b/test/routines/level3/xsyrk.h
@@ -95,8 +95,8 @@ class TestXsyrk {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers.c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/routines/level3/xtrmm.h b/test/routines/level3/xtrmm.h
index 7c9c21bc..3eb63030 100644
--- a/test/routines/level3/xtrmm.h
+++ b/test/routines/level3/xtrmm.h
@@ -97,8 +97,8 @@ class TestXtrmm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld,
+ buffers.a_mat, args.a_offset, args.a_ld,
+ buffers.b_mat, args.b_offset, args.b_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h
index 06ce6269..bf59aa94 100644
--- a/test/wrapper_cblas.h
+++ b/test/wrapper_cblas.h
@@ -31,16 +31,6 @@ CBLAS_UPLO convertToCBLAS(const Triangle v) { return (v == Triangle::kUpper) ? C
CBLAS_DIAG convertToCBLAS(const Diagonal v) { return (v == Diagonal::kUnit) ? CblasUnit : CblasNonUnit; }
CBLAS_SIDE convertToCBLAS(const Side v) { return (v == Side::kLeft) ? CblasLeft : CblasRight; }
-// Conversions from and to half-precision
-std::vector<float> HalfToFloatBuffer(const std::vector<half>& source) {
- auto result = std::vector<float>(source.size());
- for (auto i = size_t(0); i < source.size(); ++i) { result[i] = HalfToFloat(source[i]); }
- return result;
-}
-void FloatToHalfBuffer(std::vector<half>& result, const std::vector<float>& source) {
- for (auto i = size_t(0); i < source.size(); ++i) { result[i] = FloatToHalf(source[i]); }
-}
-
// OpenBLAS is not fully Netlib CBLAS compatible
#ifdef OPENBLAS_VERSION
using return_pointer_float = openblas_complex_float*;
diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h
index 6e44d780..5115b3d9 100644
--- a/test/wrapper_clblas.h
+++ b/test/wrapper_clblas.h
@@ -34,104 +34,104 @@ clblasSide convertToCLBLAS(const Side v) { return (v == Side::kLeft) ? clblasLef
// Forwards the clBLAS calls for SROTG/DROTG
template <typename T>
-clblasStatus clblasXrotg(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg(Buffer<T>& sa_buffer, const size_t sa_offset,
+ Buffer<T>& sb_buffer, const size_t sb_offset,
+ Buffer<T>& sc_buffer, const size_t sc_offset,
+ Buffer<T>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
-clblasStatus clblasXrotg<float>(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg<float>(Buffer<float>& sa_buffer, const size_t sa_offset,
+ Buffer<float>& sb_buffer, const size_t sb_offset,
+ Buffer<float>& sc_buffer, const size_t sc_offset,
+ Buffer<float>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasSrotg(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
+ return clblasSrotg(sa_buffer(), sa_offset,
+ sb_buffer(), sb_offset,
+ sc_buffer(), sc_offset,
+ ss_buffer(), ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
-clblasStatus clblasXrotg<double>(cl_mem sa_buffer, const size_t sa_offset,
- cl_mem sb_buffer, const size_t sb_offset,
- cl_mem sc_buffer, const size_t sc_offset,
- cl_mem ss_buffer, const size_t ss_offset,
+clblasStatus clblasXrotg<double>(Buffer<double>& sa_buffer, const size_t sa_offset,
+ Buffer<double>& sb_buffer, const size_t sb_offset,
+ Buffer<double>& sc_buffer, const size_t sc_offset,
+ Buffer<double>& ss_buffer, const size_t ss_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasDrotg(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
+ return clblasDrotg(sa_buffer(), sa_offset,
+ sb_buffer(), sb_offset,
+ sc_buffer(), sc_offset,
+ ss_buffer(), ss_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SROTMG/DROTMG
template <typename T>
-clblasStatus clblasXrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg(Buffer<T>& sd1_buffer, const size_t sd1_offset,
+ Buffer<T>& sd2_buffer, const size_t sd2_offset,
+ Buffer<T>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<T>& sy1_buffer, const size_t sy1_offset,
+ Buffer<T>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
-clblasStatus clblasXrotmg<float>(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg<float>(Buffer<float>& sd1_buffer, const size_t sd1_offset,
+ Buffer<float>& sd2_buffer, const size_t sd2_offset,
+ Buffer<float>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<float>& sy1_buffer, const size_t sy1_offset,
+ Buffer<float>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasSrotmg(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
+ return clblasSrotmg(sd1_buffer(), sd1_offset,
+ sd2_buffer(), sd2_offset,
+ sx1_buffer(), sx1_offset,
+ sy1_buffer(), sy1_offset,
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
-clblasStatus clblasXrotmg<double>(cl_mem sd1_buffer, const size_t sd1_offset,
- cl_mem sd2_buffer, const size_t sd2_offset,
- cl_mem sx1_buffer, const size_t sx1_offset,
- const cl_mem sy1_buffer, const size_t sy1_offset,
- cl_mem sparam_buffer, const size_t sparam_offset,
+clblasStatus clblasXrotmg<double>(Buffer<double>& sd1_buffer, const size_t sd1_offset,
+ Buffer<double>& sd2_buffer, const size_t sd2_offset,
+ Buffer<double>& sx1_buffer, const size_t sx1_offset,
+ const Buffer<double>& sy1_buffer, const size_t sy1_offset,
+ Buffer<double>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasDrotmg(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
+ return clblasDrotmg(sd1_buffer(), sd1_offset,
+ sd2_buffer(), sd2_offset,
+ sx1_buffer(), sx1_offset,
+ sy1_buffer(), sy1_offset,
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SROT/DROT
clblasStatus clblasXrot(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
const float cos,
const float sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSrot(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
cos,
sin,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXrot(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
const double cos,
const double sin,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDrot(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
cos,
sin,
num_queues, queues, num_wait_events, wait_events, events);
@@ -140,356 +140,394 @@ clblasStatus clblasXrot(const size_t n,
// Forwards the clBLAS calls for SROTM/DROTM
template <typename T>
clblasStatus clblasXrotm(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXrotm<float>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSrotm(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- sparam_buffer, sparam_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXrotm<double>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem sparam_buffer, const size_t sparam_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& sparam_buffer, const size_t sparam_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDrotm(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- sparam_buffer, sparam_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ sparam_buffer(), sparam_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
// Forwards the clBLAS calls for SSWAP/DSWAP/CSWAP/ZSWAP
template <typename T>
clblasStatus clblasXswap(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXswap<float>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<double>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<float2>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<double2>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZswap(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<half>(const size_t n,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXswap(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
clblasStatus clblasXscal(const size_t n,
const float alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSscal(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const double alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDscal(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const float2 alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCscal(n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const double2 alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZscal(n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const half alpha,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXscal(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T>
clblasStatus clblasXcopy(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXcopy<float>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasScopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<double>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<float2>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<double2>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZcopy(n,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<half>(const size_t n,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXcopy(n,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
clblasStatus clblasXaxpy(const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSaxpy(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDaxpy(n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCaxpy(n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZaxpy(n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXaxpy(n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SDOT/DDOT
template <typename T>
clblasStatus clblasXdot(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdot<float>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& dot_buffer, const size_t dot_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, n);
return clblasSdot(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdot<double>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& dot_buffer, const size_t dot_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, n);
return clblasDdot(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdot<half>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& dot_buffer, const size_t dot_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto dot_buffer_bis = HalfToFloatBuffer(dot_buffer, queues[0]);
+ auto status = clblasXdot(n,
+ dot_buffer_bis, dot_offset,
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(dot_buffer, dot_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CDOTU/ZDOTU
template <typename T>
clblasStatus clblasXdotu(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdotu<float2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& dot_buffer, const size_t dot_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotu(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdotu<double2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& dot_buffer, const size_t dot_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotu(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -497,42 +535,42 @@ clblasStatus clblasXdotu<double2>(const size_t n,
// Forwards the clBLAS calls for CDOTC/ZDOTC
template <typename T>
clblasStatus clblasXdotc(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<T>& dot_buffer, const size_t dot_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXdotc<float2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& dot_buffer, const size_t dot_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCdotc(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdotc<double2>(const size_t n,
- cl_mem dot_buffer, const size_t dot_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& dot_buffer, const size_t dot_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZdotc(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
+ dot_buffer(), dot_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -540,229 +578,250 @@ clblasStatus clblasXdotc<double2>(const size_t n,
// Forwards the clBLAS calls for SNRM2/DNRM2/ScNRM2/DzNRM2
template <typename T>
clblasStatus clblasXnrm2(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXnrm2<float>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, 2*n);
return clblasSnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<double>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, 2*n);
return clblasDnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<float2>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, 2*n);
return clblasScnrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<double2>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, 2*n);
return clblasDznrm2(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ nrm2_buffer(), nrm2_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<half>(const size_t n,
- cl_mem nrm2_buffer, const size_t nrm2_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto nrm2_buffer_bis = HalfToFloatBuffer(nrm2_buffer, queues[0]);
+ auto status = clblasXnrm2(n,
+ nrm2_buffer_bis, nrm2_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(nrm2_buffer, nrm2_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM
template <typename T>
clblasStatus clblasXasum(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& asum_buffer, const size_t asum_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXasum<float>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& asum_buffer, const size_t asum_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, n);
return clblasSasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<double>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& asum_buffer, const size_t asum_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, n);
return clblasDasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<float2>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& asum_buffer, const size_t asum_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, n);
return clblasScasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<double2>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& asum_buffer, const size_t asum_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, n);
return clblasDzasum(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ asum_buffer(), asum_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<half>(const size_t n,
- cl_mem asum_buffer, const size_t asum_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& asum_buffer, const size_t asum_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto asum_buffer_bis = HalfToFloatBuffer(asum_buffer, queues[0]);
+ auto status = clblasXasum(n,
+ asum_buffer_bis, asum_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(asum_buffer, asum_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
template <typename T>
clblasStatus clblasXamax(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<T>& imax_buffer, const size_t imax_offset,
+ const Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXamax<float>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& imax_buffer, const size_t imax_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float>(context, 2*n);
return clblasiSamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<double>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& imax_buffer, const size_t imax_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double>(context, 2*n);
return clblasiDamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<float2>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& imax_buffer, const size_t imax_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<float2>(context, 2*n);
return clblasiCamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<double2>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& imax_buffer, const size_t imax_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
auto context = queue.GetContext();
auto scratch_buffer = Buffer<double2>(context, 2*n);
return clblasiZamax(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ imax_buffer(), imax_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<half>(const size_t n,
- cl_mem imax_buffer, const size_t imax_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& imax_buffer, const size_t imax_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto imax_buffer_bis = HalfToFloatBuffer(imax_buffer, queues[0]);
+ auto status = clblasXamax(n,
+ imax_buffer_bis, imax_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(imax_buffer, imax_buffer_bis, queues[0]);
+ return status;
}
// =================================================================================================
@@ -773,207 +832,231 @@ clblasStatus clblasXamax<half>(const size_t n,
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgemv(layout, a_transpose,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgemv(layout, a_transpose,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgemv(layout, a_transpose,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgemv(layout, a_transpose,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXgemv(layout, a_transpose,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgbmv(layout, a_transpose,
m, n, kl, ku,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgbmv(layout, a_transpose,
m, n, kl, ku,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgbmv(layout, a_transpose,
m, n, kl, ku,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgbmv(layout, a_transpose,
m, n, kl, ku,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXgbmv(layout, a_transpose,
+ m, n, kl, ku,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CHEMV/ZHEMV
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChemv(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhemv(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -981,37 +1064,37 @@ clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChbmv(layout, triangle,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhbmv(layout, triangle,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1019,37 +1102,37 @@ clblasStatus clblasXhbmv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
const float2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpmv(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_float2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
const double2 beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpmv(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
cl_double2{{beta.real(), beta.imag()}},
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1057,162 +1140,198 @@ clblasStatus clblasXhpmv(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsymv(layout, triangle,
n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsymv(layout, triangle,
n,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXsymv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSBMV/DSBMV
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsbmv(layout, triangle,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsbmv(layout, triangle,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXsbmv(layout, triangle,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSPMV/DSPMV
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
const float beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspmv(layout, triangle,
n,
alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
const double beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspmv(layout, triangle,
n,
alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
beta,
- y_buffer, y_offset, static_cast<int>(y_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem ap_buffer, const size_t ap_offset,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& ap_buffer, const size_t ap_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
- cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto status = clblasXspmv(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc,
+ HalfToFloat(beta),
+ y_buffer_bis, y_offset, y_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(y_buffer, y_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
template <typename T>
clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtrmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1220,16 +1339,16 @@ clblasStatus clblasXtrmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1237,16 +1356,16 @@ clblasStatus clblasXtrmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1254,16 +1373,16 @@ clblasStatus clblasXtrmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1271,34 +1390,42 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtrmv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
template <typename T>
clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtbmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1306,16 +1433,16 @@ clblasStatus clblasXtbmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1323,16 +1450,16 @@ clblasStatus clblasXtbmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1340,16 +1467,16 @@ clblasStatus clblasXtbmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1357,34 +1484,42 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtbmv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ a_buffer_bis, a_offset, a_ld,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
template <typename T>
clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& ap_buffer, const size_t ap_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtpmv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1392,16 +1527,16 @@ clblasStatus clblasXtpmv<float>(const clblasOrder layout, const clblasUplo trian
auto scratch_buffer = Buffer<float>(context, n);
return clblasStpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1409,16 +1544,16 @@ clblasStatus clblasXtpmv<double>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<double>(context, n);
return clblasDtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1426,16 +1561,16 @@ clblasStatus clblasXtpmv<float2>(const clblasOrder layout, const clblasUplo tria
auto scratch_buffer = Buffer<float2>(context, n);
return clblasCtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
auto queue = Queue(queues[0]);
@@ -1443,79 +1578,87 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
auto scratch_buffer = Buffer<double2>(context, n);
return clblasZtpmv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& ap_buffer, const size_t ap_offset,
+ Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto status = clblasXtpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ ap_buffer_bis, ap_offset,
+ x_buffer_bis, x_offset, x_inc,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(x_buffer, x_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
template <typename T>
clblasStatus clblasXtrsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtrsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrsv(layout, triangle, a_transpose, diagonal,
n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1523,60 +1666,60 @@ clblasStatus clblasXtrsv<double2>(const clblasOrder layout, const clblasUplo tri
template <typename T>
clblasStatus clblasXtbsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtbsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtbsv(layout, triangle, a_transpose, diagonal,
n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1584,60 +1727,60 @@ clblasStatus clblasXtbsv<double2>(const clblasOrder layout, const clblasUplo tri
template <typename T>
clblasStatus clblasXtpsv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T>& ap_buffer, const size_t ap_offset,
+ Buffer<T>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events);
template <>
clblasStatus clblasXtpsv<float>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& ap_buffer, const size_t ap_offset,
+ Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<double>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& ap_buffer, const size_t ap_offset,
+ Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<float2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& ap_buffer, const size_t ap_offset,
+ Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpsv<double2>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
- const cl_mem ap_buffer, const size_t ap_offset,
- cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& ap_buffer, const size_t ap_offset,
+ Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtpsv(layout, triangle, a_transpose, diagonal,
n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1645,77 +1788,88 @@ clblasStatus clblasXtpsv<double2>(const clblasOrder layout, const clblasUplo tri
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSger(layout,
m, n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDger(layout,
m, n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXger(layout,
+ m, n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CGERU/ZGERU
clblasStatus clblasXgeru(const clblasOrder layout,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgeru(layout,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgeru(const clblasOrder layout,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgeru(layout,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1723,33 +1877,33 @@ clblasStatus clblasXgeru(const clblasOrder layout,
clblasStatus clblasXgerc(const clblasOrder layout,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgerc(layout,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgerc(const clblasOrder layout,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgerc(layout,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1757,29 +1911,29 @@ clblasStatus clblasXgerc(const clblasOrder layout,
clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1787,29 +1941,29 @@ clblasStatus clblasXher(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1817,33 +1971,33 @@ clblasStatus clblasXhpr(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher2(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher2(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1851,33 +2005,33 @@ clblasStatus clblasXher2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChpr2(layout, triangle,
n,
cl_float2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double2 alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double2>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double2>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double2>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhpr2(layout, triangle,
n,
cl_double2{{alpha.real(), alpha.imag()}},
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -1885,166 +2039,206 @@ clblasStatus clblasXhpr2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXsyr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSPR/DSPR
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<float>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<double>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspr(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ Buffer<half>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto status = clblasXspr(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ ap_buffer_bis, ap_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSYR2/DSYR2
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- a_buffer, a_offset, a_ld,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ a_buffer(), a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto status = clblasXsyr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ a_buffer_bis, a_offset, a_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(a_buffer, a_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSPR2/DSPR2
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const float alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<float>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<float>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<float>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSspr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const double alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
+ const Buffer<double>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<double>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<double>& ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDspr2(layout, triangle,
n,
alpha,
- x_buffer, x_offset, static_cast<int>(x_inc),
- y_buffer, y_offset, static_cast<int>(y_inc),
- ap_buffer, ap_offset,
+ x_buffer(), x_offset, static_cast<int>(x_inc),
+ y_buffer(), y_offset, static_cast<int>(y_inc),
+ ap_buffer(), ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
- const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
- const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
- cl_mem ap_buffer, const size_t ap_offset,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ const Buffer<half>& x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<half>& y_buffer, const size_t y_offset, const size_t y_inc,
+ Buffer<half>& ap_buffer, const size_t ap_offset,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto x_buffer_bis = HalfToFloatBuffer(x_buffer, queues[0]);
+ auto y_buffer_bis = HalfToFloatBuffer(y_buffer, queues[0]);
+ auto ap_buffer_bis = HalfToFloatBuffer(ap_buffer, queues[0]);
+ auto status = clblasXspr2(layout, triangle,
+ n,
+ HalfToFloat(alpha),
+ x_buffer_bis, x_offset, x_inc,
+ y_buffer_bis, y_offset, y_inc,
+ ap_buffer_bis, ap_offset,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(ap_buffer, ap_buffer_bis, queues[0]);
+ return status;
}
// =================================================================================================
@@ -2055,207 +2249,231 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSgemm(layout, a_transpose, b_transpose,
m, n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDgemm(layout, a_transpose, b_transpose,
m, n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCgemm(layout, a_transpose, b_transpose,
m, n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZgemm(layout, a_transpose, b_transpose,
m, n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXgemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsymm(layout, side, triangle,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsymm(layout, side, triangle,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsymm(layout, side, triangle,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsymm(layout, side, triangle,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsymm(layout, side, triangle,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CHEMM/ZHEMM
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasChemm(layout, side, triangle,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZhemm(layout, side, triangle,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2263,109 +2481,119 @@ clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyrk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyrk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsyrk(layout, triangle, a_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsyrk(layout, triangle, a_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsyrk(layout, triangle, a_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CHERK/ZHERK
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCherk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZherk(layout, triangle, a_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
+ a_buffer(), a_offset, a_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2373,122 +2601,134 @@ clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, co
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasSsyr2k(layout, triangle, ab_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDsyr2k(layout, triangle, ab_transpose,
n, k,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCsyr2k(layout, triangle, ab_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_float2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZsyr2k(layout, triangle, ab_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
cl_double2{{beta.real(), beta.imag()}},
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<half>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto c_buffer_bis = HalfToFloatBuffer(c_buffer, queues[0]);
+ auto status = clblasXsyr2k(layout, triangle, ab_transpose,
+ n, k,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ HalfToFloat(beta),
+ c_buffer_bis, c_offset, c_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(c_buffer, c_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for CHER2K/ZHER2K
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<float2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCher2k(layout, triangle, ab_transpose,
n, k,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ Buffer<double2>& c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZher2k(layout, triangle, ab_transpose,
n, k,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
beta,
- c_buffer, c_offset, c_ld,
+ c_buffer(), c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
@@ -2496,134 +2736,152 @@ clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, c
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrmm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto status = clblasXtrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis, queues[0]);
+ return status;
}
// Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasStrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasDtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<float2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<float2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasCtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_float2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const Buffer<double2>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<double2>& b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasZtrsm(layout, side, triangle, a_transpose, diagonal,
m, n,
cl_double2{{alpha.real(), alpha.imag()}},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
+ a_buffer(), a_offset, a_ld,
+ b_buffer(), b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- cl_uint num_queues, cl_command_queue *queues,
- cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
- return clblasNotImplemented;
+ const Buffer<half>& a_buffer, const size_t a_offset, const size_t a_ld,
+ Buffer<half>& b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_uint num_queues, cl_command_queue *queues,
+ cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
+ auto a_buffer_bis = HalfToFloatBuffer(a_buffer, queues[0]);
+ auto b_buffer_bis = HalfToFloatBuffer(b_buffer, queues[0]);
+ auto status = clblasXtrsm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ HalfToFloat(alpha),
+ a_buffer_bis, a_offset, a_ld,
+ b_buffer_bis, b_offset, b_ld,
+ num_queues, queues, num_wait_events, wait_events, events);
+ FloatToHalfBuffer(b_buffer, b_buffer_bis, queues[0]);
+ return status;
}
// =================================================================================================