summaryrefslogtreecommitdiff
path: root/test/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-03-05 15:04:16 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-03-05 15:04:16 +0100
commitcdf354f89524ed88d4f9358004e5a8eabd9ce286 (patch)
tree644b8a67d2e34663335a5a7faa2052e13fe6e1aa /test/routines
parent7f14b11f1e240f12f5f6bf93cbbeab26001e9a5c (diff)
Adjusted the test-infrastructure to support testing of batched-versions of routines
Diffstat (limited to 'test/routines')
-rw-r--r--test/routines/level1/xamax.hpp20
-rw-r--r--test/routines/level1/xasum.hpp20
-rw-r--r--test/routines/level1/xaxpy.hpp20
-rw-r--r--test/routines/level1/xcopy.hpp20
-rw-r--r--test/routines/level1/xdot.hpp26
-rw-r--r--test/routines/level1/xdotc.hpp26
-rw-r--r--test/routines/level1/xdotu.hpp26
-rw-r--r--test/routines/level1/xnrm2.hpp20
-rw-r--r--test/routines/level1/xscal.hpp14
-rw-r--r--test/routines/level1/xswap.hpp22
-rw-r--r--test/routines/level2/xgbmv.hpp26
-rw-r--r--test/routines/level2/xgemv.hpp26
-rw-r--r--test/routines/level2/xger.hpp26
-rw-r--r--test/routines/level2/xgerc.hpp26
-rw-r--r--test/routines/level2/xgeru.hpp26
-rw-r--r--test/routines/level2/xhbmv.hpp26
-rw-r--r--test/routines/level2/xhemv.hpp26
-rw-r--r--test/routines/level2/xher.hpp20
-rw-r--r--test/routines/level2/xher2.hpp26
-rw-r--r--test/routines/level2/xhpmv.hpp26
-rw-r--r--test/routines/level2/xhpr.hpp20
-rw-r--r--test/routines/level2/xhpr2.hpp26
-rw-r--r--test/routines/level2/xsbmv.hpp26
-rw-r--r--test/routines/level2/xspmv.hpp26
-rw-r--r--test/routines/level2/xspr.hpp20
-rw-r--r--test/routines/level2/xspr2.hpp26
-rw-r--r--test/routines/level2/xsymv.hpp26
-rw-r--r--test/routines/level2/xsyr.hpp20
-rw-r--r--test/routines/level2/xsyr2.hpp26
-rw-r--r--test/routines/level2/xtbmv.hpp20
-rw-r--r--test/routines/level2/xtpmv.hpp20
-rw-r--r--test/routines/level2/xtrmv.hpp20
-rw-r--r--test/routines/level2/xtrsv.hpp20
-rw-r--r--test/routines/level3/xgemm.hpp26
-rw-r--r--test/routines/level3/xhemm.hpp26
-rw-r--r--test/routines/level3/xher2k.hpp26
-rw-r--r--test/routines/level3/xherk.hpp20
-rw-r--r--test/routines/level3/xsymm.hpp26
-rw-r--r--test/routines/level3/xsyr2k.hpp26
-rw-r--r--test/routines/level3/xsyrk.hpp20
-rw-r--r--test/routines/level3/xtrmm.hpp20
-rw-r--r--test/routines/level3/xtrsm.hpp20
-rw-r--r--test/routines/levelx/xinvert.hpp14
-rw-r--r--test/routines/levelx/xomatcopy.hpp14
44 files changed, 501 insertions, 501 deletions
diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp
index a22f681f..faffff33 100644
--- a/test/routines/level1/xamax.hpp
+++ b/test/routines/level1/xamax.hpp
@@ -74,12 +74,12 @@ class TestXamax {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Amax<T>(args.n,
- buffers.scalar(), args.imax_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].scalar(), args.imax_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -87,12 +87,12 @@ class TestXamax {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXamax<T>(args.n,
- buffers.scalar, args.imax_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].scalar, args.imax_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -101,15 +101,15 @@ class TestXamax {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXamax(args.n,
scalar_cpu, args.imax_offset,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp
index 64377189..fb2c9f1a 100644
--- a/test/routines/level1/xasum.hpp
+++ b/test/routines/level1/xasum.hpp
@@ -74,12 +74,12 @@ class TestXasum {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Asum<T>(args.n,
- buffers.scalar(), args.asum_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].scalar(), args.asum_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -87,12 +87,12 @@ class TestXasum {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXasum<T>(args.n,
- buffers.scalar, args.asum_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].scalar, args.asum_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -101,15 +101,15 @@ class TestXasum {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXasum(args.n,
scalar_cpu, args.asum_offset,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index eba067c0..1c74f67f 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -75,12 +75,12 @@ class TestXaxpy {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Axpy(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -88,12 +88,12 @@ class TestXaxpy {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -102,15 +102,15 @@ class TestXaxpy {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXaxpy(args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp
index 753f0da5..55980f30 100644
--- a/test/routines/level1/xcopy.hpp
+++ b/test/routines/level1/xcopy.hpp
@@ -74,12 +74,12 @@ class TestXcopy {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Copy<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -87,12 +87,12 @@ class TestXcopy {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXcopy<T>(args.n,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -101,15 +101,15 @@ class TestXcopy {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXcopy(args.n,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp
index 8127247d..1ea69c17 100644
--- a/test/routines/level1/xdot.hpp
+++ b/test/routines/level1/xdot.hpp
@@ -78,13 +78,13 @@ class TestXdot {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dot<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].scalar(), args.dot_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -92,13 +92,13 @@ class TestXdot {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdot<T>(args.n,
- buffers.scalar, args.dot_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].scalar, args.dot_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,18 +107,18 @@ class TestXdot {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXdot(args.n,
scalar_cpu, args.dot_offset,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp
index 96d97dc4..00dcf7c2 100644
--- a/test/routines/level1/xdotc.hpp
+++ b/test/routines/level1/xdotc.hpp
@@ -78,13 +78,13 @@ class TestXdotc {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dotc<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].scalar(), args.dot_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -92,13 +92,13 @@ class TestXdotc {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotc<T>(args.n,
- buffers.scalar, args.dot_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].scalar, args.dot_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,18 +107,18 @@ class TestXdotc {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXdotc(args.n,
scalar_cpu, args.dot_offset,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp
index 70c7fceb..512de985 100644
--- a/test/routines/level1/xdotu.hpp
+++ b/test/routines/level1/xdotu.hpp
@@ -78,13 +78,13 @@ class TestXdotu {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dotu<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].scalar(), args.dot_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -92,13 +92,13 @@ class TestXdotu {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXdotu<T>(args.n,
- buffers.scalar, args.dot_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].scalar, args.dot_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,18 +107,18 @@ class TestXdotu {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXdotu(args.n,
scalar_cpu, args.dot_offset,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp
index ce33fe59..20f75226 100644
--- a/test/routines/level1/xnrm2.hpp
+++ b/test/routines/level1/xnrm2.hpp
@@ -74,12 +74,12 @@ class TestXnrm2 {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Nrm2<T>(args.n,
- buffers.scalar(), args.nrm2_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].scalar(), args.nrm2_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -87,12 +87,12 @@ class TestXnrm2 {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXnrm2<T>(args.n,
- buffers.scalar, args.nrm2_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].scalar, args.nrm2_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -101,15 +101,15 @@ class TestXnrm2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXnrm2(args.n,
scalar_cpu, args.nrm2_offset,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers[0].scalar.Write(queue, args.scalar_size, scalar_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp
index d89688b4..e2600834 100644
--- a/test/routines/level1/xscal.hpp
+++ b/test/routines/level1/xscal.hpp
@@ -71,11 +71,11 @@ class TestXscal {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Scal(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -83,11 +83,11 @@ class TestXscal {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXscal(args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -96,12 +96,12 @@ class TestXscal {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXscal(args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp
index 49b0d3d0..b9f06eb7 100644
--- a/test/routines/level1/xswap.hpp
+++ b/test/routines/level1/xswap.hpp
@@ -74,12 +74,12 @@ class TestXswap {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Swap<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -87,12 +87,12 @@ class TestXswap {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXswap<T>(args.n,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -101,16 +101,16 @@ class TestXswap {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXswap(args.n,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp
index f371b9a7..57c16104 100644
--- a/test/routines/level2/xgbmv.hpp
+++ b/test/routines/level2/xgbmv.hpp
@@ -86,14 +86,14 @@ class TestXgbmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gbmv(args.layout, args.a_transpose,
args.m, args.n, args.kl, args.ku, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -101,15 +101,15 @@ class TestXgbmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.kl, args.ku, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -118,20 +118,20 @@ class TestXgbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXgbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
args.m, args.n, args.kl, args.ku, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp
index 2442be4c..3c56c405 100644
--- a/test/routines/level2/xgemv.hpp
+++ b/test/routines/level2/xgemv.hpp
@@ -86,14 +86,14 @@ class TestXgemv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemv(args.layout, args.a_transpose,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -101,15 +101,15 @@ class TestXgemv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
args.m, args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -118,20 +118,20 @@ class TestXgemv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXgemv(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
args.m, args.n, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp
index 3e7ccbc3..f9a6fefd 100644
--- a/test/routines/level2/xger.hpp
+++ b/test/routines/level2/xger.hpp
@@ -82,14 +82,14 @@ class TestXger {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Ger(args.layout,
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -97,14 +97,14 @@ class TestXger {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXger(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -113,19 +113,19 @@ class TestXger {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXger(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp
index d880ae1f..ddc9030a 100644
--- a/test/routines/level2/xgerc.hpp
+++ b/test/routines/level2/xgerc.hpp
@@ -82,14 +82,14 @@ class TestXgerc {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gerc(args.layout,
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -97,14 +97,14 @@ class TestXgerc {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgerc(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -113,19 +113,19 @@ class TestXgerc {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXgerc(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp
index 1735e42a..8d5b8589 100644
--- a/test/routines/level2/xgeru.hpp
+++ b/test/routines/level2/xgeru.hpp
@@ -82,14 +82,14 @@ class TestXgeru {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Geru(args.layout,
args.m, args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -97,14 +97,14 @@ class TestXgeru {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgeru(convertToCLBLAS(args.layout),
args.m, args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -113,19 +113,19 @@ class TestXgeru {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXgeru(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp
index 99538bf1..50130359 100644
--- a/test/routines/level2/xhbmv.hpp
+++ b/test/routines/level2/xhbmv.hpp
@@ -80,14 +80,14 @@ class TestXhbmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hbmv(args.layout, args.triangle,
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXhbmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXhbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXhbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.kl, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp
index 3792cb66..f69b031c 100644
--- a/test/routines/level2/xhemv.hpp
+++ b/test/routines/level2/xhemv.hpp
@@ -80,14 +80,14 @@ class TestXhemv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hemv(args.layout, args.triangle,
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXhemv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhemv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXhemv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXhemv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp
index c58eb189..c3d809bf 100644
--- a/test/routines/level2/xher.hpp
+++ b/test/routines/level2/xher.hpp
@@ -76,13 +76,13 @@ class TestXher {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Her(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -90,14 +90,14 @@ class TestXher {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXher(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -106,17 +106,17 @@ class TestXher {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXher(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp
index 8a7eb0b6..7ddf9ed1 100644
--- a/test/routines/level2/xher2.hpp
+++ b/test/routines/level2/xher2.hpp
@@ -80,14 +80,14 @@ class TestXher2 {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Her2(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXher2 {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXher2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXher2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXher2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp
index 0862b619..7fae80b8 100644
--- a/test/routines/level2/xhpmv.hpp
+++ b/test/routines/level2/xhpmv.hpp
@@ -80,14 +80,14 @@ class TestXhpmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hpmv(args.layout, args.triangle,
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXhpmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhpmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat, args.ap_offset,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXhpmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXhpmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
ap_mat_cpu, args.ap_offset,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp
index 5b454174..a46cb8e6 100644
--- a/test/routines/level2/xhpr.hpp
+++ b/test/routines/level2/xhpr.hpp
@@ -76,13 +76,13 @@ class TestXhpr {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hpr(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].ap_mat(), args.ap_offset,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -90,14 +90,14 @@ class TestXhpr {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhpr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -106,17 +106,17 @@ class TestXhpr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXhpr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp
index b770da2e..08f12768 100644
--- a/test/routines/level2/xhpr2.hpp
+++ b/test/routines/level2/xhpr2.hpp
@@ -80,14 +80,14 @@ class TestXhpr2 {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hpr2(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].ap_mat(), args.ap_offset,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXhpr2 {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhpr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXhpr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXhpr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp
index 7a836170..a45dbe8f 100644
--- a/test/routines/level2/xsbmv.hpp
+++ b/test/routines/level2/xsbmv.hpp
@@ -80,14 +80,14 @@ class TestXsbmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Sbmv(args.layout, args.triangle,
args.n, args.kl, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXsbmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsbmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.kl, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXsbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXsbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.kl, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp
index 352c8cfd..a455f652 100644
--- a/test/routines/level2/xspmv.hpp
+++ b/test/routines/level2/xspmv.hpp
@@ -80,14 +80,14 @@ class TestXspmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Spmv(args.layout, args.triangle,
args.n, args.alpha,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXspmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXspmv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.ap_mat, args.ap_offset,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXspmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXspmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
ap_mat_cpu, args.ap_offset,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp
index 988bcdc2..ab9ab85f 100644
--- a/test/routines/level2/xspr.hpp
+++ b/test/routines/level2/xspr.hpp
@@ -76,13 +76,13 @@ class TestXspr {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Spr(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].ap_mat(), args.ap_offset,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -90,14 +90,14 @@ class TestXspr {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXspr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -106,17 +106,17 @@ class TestXspr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXspr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp
index ee517bc1..a73975a5 100644
--- a/test/routines/level2/xspr2.hpp
+++ b/test/routines/level2/xspr2.hpp
@@ -80,14 +80,14 @@ class TestXspr2 {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Spr2(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].ap_mat(), args.ap_offset,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXspr2 {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXspr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].ap_mat, args.ap_offset,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXspr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXspr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp
index 5eecfb74..c93492ed 100644
--- a/test/routines/level2/xsymv.hpp
+++ b/test/routines/level2/xsymv.hpp
@@ -80,14 +80,14 @@ class TestXsymv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Symv(args.layout, args.triangle,
args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXsymv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsymv(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc, args.beta,
- buffers.y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXsymv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXsymv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc, args.beta,
y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers[0].y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp
index ac4ee1ff..ac2c5e98 100644
--- a/test/routines/level2/xsyr.hpp
+++ b/test/routines/level2/xsyr.hpp
@@ -76,13 +76,13 @@ class TestXsyr {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syr(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -90,14 +90,14 @@ class TestXsyr {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -106,17 +106,17 @@ class TestXsyr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXsyr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp
index 43644883..9f8d315b 100644
--- a/test/routines/level2/xsyr2.hpp
+++ b/test/routines/level2/xsyr2.hpp
@@ -80,14 +80,14 @@ class TestXsyr2 {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syr2(args.layout, args.triangle,
args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
+ buffers[0].y_vec(), args.y_offset, args.y_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -95,15 +95,15 @@ class TestXsyr2 {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr2(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
args.n, args.alpha,
- buffers.x_vec, args.x_offset, args.x_inc,
- buffers.y_vec, args.y_offset, args.y_inc,
- buffers.a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
+ buffers[0].y_vec, args.y_offset, args.y_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,20 +112,20 @@ class TestXsyr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].y_vec.Read(queue, args.y_size, y_vec_cpu);
cblasXsyr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
x_vec_cpu, args.x_offset, args.x_inc,
y_vec_cpu, args.y_offset, args.y_inc,
a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers[0].a_mat.Write(queue, args.a_size, a_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp
index ab9244af..2d964fda 100644
--- a/test/routines/level2/xtbmv.hpp
+++ b/test/routines/level2/xtbmv.hpp
@@ -75,13 +75,13 @@ class TestXtbmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Tbmv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
args.n, args.kl,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -89,7 +89,7 @@ class TestXtbmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtbmv<T>(convertToCLBLAS(args.layout),
@@ -97,8 +97,8 @@ class TestXtbmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n, args.kl,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,11 +107,11 @@ class TestXtbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXtbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
@@ -119,7 +119,7 @@ class TestXtbmv {
args.n, args.kl,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp
index 3821e1a4..fcfd86bf 100644
--- a/test/routines/level2/xtpmv.hpp
+++ b/test/routines/level2/xtpmv.hpp
@@ -75,13 +75,13 @@ class TestXtpmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Tpmv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
args.n,
- buffers.ap_mat(), args.ap_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].ap_mat(), args.ap_offset,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -89,7 +89,7 @@ class TestXtpmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtpmv<T>(convertToCLBLAS(args.layout),
@@ -97,8 +97,8 @@ class TestXtpmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.ap_mat, args.ap_offset,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].ap_mat, args.ap_offset,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,11 +107,11 @@ class TestXtpmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXtpmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
@@ -119,7 +119,7 @@ class TestXtpmv {
args.n,
ap_mat_cpu, args.ap_offset,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp
index 7211c757..4e209584 100644
--- a/test/routines/level2/xtrmv.hpp
+++ b/test/routines/level2/xtrmv.hpp
@@ -75,13 +75,13 @@ class TestXtrmv {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Trmv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
args.n,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -89,7 +89,7 @@ class TestXtrmv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtrmv<T>(convertToCLBLAS(args.layout),
@@ -97,8 +97,8 @@ class TestXtrmv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -107,11 +107,11 @@ class TestXtrmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXtrmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
@@ -119,7 +119,7 @@ class TestXtrmv {
args.n,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index 78b9672f..090684b1 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -90,13 +90,13 @@ class TestXtrsv {
}
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Trsv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
args.n,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].x_vec(), args.x_offset, args.x_inc,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -104,7 +104,7 @@ class TestXtrsv {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtrsv<T>(convertToCLBLAS(args.layout),
@@ -112,8 +112,8 @@ class TestXtrsv {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.n,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.x_vec, args.x_offset, args.x_inc,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].x_vec, args.x_offset, args.x_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -122,11 +122,11 @@ class TestXtrsv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].x_vec.Read(queue, args.x_size, x_vec_cpu);
cblasXtrsv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
@@ -134,7 +134,7 @@ class TestXtrsv {
args.n,
a_mat_cpu, args.a_offset, args.a_ld,
x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers[0].x_vec.Write(queue, args.x_size, x_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index 1b12fb1c..5b220889 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -88,14 +88,14 @@ class TestXgemm {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemm(args.layout, args.a_transpose, args.b_transpose,
args.m, args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -103,16 +103,16 @@ class TestXgemm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemm(convertToCLBLAS(args.layout),
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.b_transpose),
args.m, args.n, args.k, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -121,13 +121,13 @@ class TestXgemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXgemm(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.b_transpose),
@@ -135,7 +135,7 @@ class TestXgemm {
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp
index 76550b15..e6e8724f 100644
--- a/test/routines/level3/xhemm.hpp
+++ b/test/routines/level3/xhemm.hpp
@@ -88,14 +88,14 @@ class TestXhemm {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Hemm(args.layout, args.side, args.triangle,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -103,16 +103,16 @@ class TestXhemm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXhemm(convertToCLBLAS(args.layout),
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -121,13 +121,13 @@ class TestXhemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXhemm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
@@ -135,7 +135,7 @@ class TestXhemm {
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp
index 5ca3aac6..749eca11 100644
--- a/test/routines/level3/xher2k.hpp
+++ b/test/routines/level3/xher2k.hpp
@@ -86,15 +86,15 @@ class TestXher2k {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto alpha2 = T{args.alpha, args.alpha};
auto status = Her2k(args.layout, args.triangle, args.a_transpose,
args.n, args.k, alpha2,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -102,7 +102,7 @@ class TestXher2k {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto alpha2 = T{args.alpha, args.alpha};
@@ -110,9 +110,9 @@ class TestXher2k {
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, alpha2,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -121,13 +121,13 @@ class TestXher2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
auto alpha2 = T{args.alpha, args.alpha};
cblasXher2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
@@ -136,7 +136,7 @@ class TestXher2k {
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp
index e93d887a..e9193847 100644
--- a/test/routines/level3/xherk.hpp
+++ b/test/routines/level3/xherk.hpp
@@ -79,13 +79,13 @@ class TestXherk {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Herk(args.layout, args.triangle, args.a_transpose,
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -93,15 +93,15 @@ class TestXherk {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXherk(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -110,18 +110,18 @@ class TestXherk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<U> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXherk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp
index 9d127e26..bcd74fda 100644
--- a/test/routines/level3/xsymm.hpp
+++ b/test/routines/level3/xsymm.hpp
@@ -88,14 +88,14 @@ class TestXsymm {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Symm(args.layout, args.side, args.triangle,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -103,16 +103,16 @@ class TestXsymm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsymm(convertToCLBLAS(args.layout),
convertToCLBLAS(args.side),
convertToCLBLAS(args.triangle),
args.m, args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -121,13 +121,13 @@ class TestXsymm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXsymm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
@@ -135,7 +135,7 @@ class TestXsymm {
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp
index d1bdac56..c722e0cf 100644
--- a/test/routines/level3/xsyr2k.hpp
+++ b/test/routines/level3/xsyr2k.hpp
@@ -86,14 +86,14 @@ class TestXsyr2k {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syr2k(args.layout, args.triangle, args.a_transpose,
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -101,16 +101,16 @@ class TestXsyr2k {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr2k(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -119,13 +119,13 @@ class TestXsyr2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXsyr2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
@@ -133,7 +133,7 @@ class TestXsyr2k {
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp
index 1330924e..7d5c2039 100644
--- a/test/routines/level3/xsyrk.hpp
+++ b/test/routines/level3/xsyrk.hpp
@@ -79,13 +79,13 @@ class TestXsyrk {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syrk(args.layout, args.triangle, args.a_transpose,
args.n, args.k, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
- buffers.c_mat(), args.c_offset, args.c_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld, args.beta,
+ buffers[0].c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -93,15 +93,15 @@ class TestXsyrk {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyrk(convertToCLBLAS(args.layout),
convertToCLBLAS(args.triangle),
convertToCLBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld, args.beta,
- buffers.c_mat, args.c_offset, args.c_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers[0].c_mat, args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -110,18 +110,18 @@ class TestXsyrk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].c_mat.Read(queue, args.c_size, c_mat_cpu);
cblasXsyrk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld, args.beta,
c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers[0].c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp
index 7c5bd842..50cca6f8 100644
--- a/test/routines/level3/xtrmm.hpp
+++ b/test/routines/level3/xtrmm.hpp
@@ -79,13 +79,13 @@ class TestXtrmm {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -93,7 +93,7 @@ class TestXtrmm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtrmm(convertToCLBLAS(args.layout),
@@ -102,8 +102,8 @@ class TestXtrmm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.m, args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -112,11 +112,11 @@ class TestXtrmm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
cblasXtrmm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
@@ -125,7 +125,7 @@ class TestXtrmm {
args.m, args.n, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers[0].b_mat.Write(queue, args.b_size, b_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index a70ef03f..91f91d0b 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -91,13 +91,13 @@ class TestXtrsm {
}
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Trsm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -105,7 +105,7 @@ class TestXtrsm {
// Describes how to run the clBLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CLBLAS
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXtrsm(convertToCLBLAS(args.layout),
@@ -114,8 +114,8 @@ class TestXtrsm {
convertToCLBLAS(args.a_transpose),
convertToCLBLAS(args.diagonal),
args.m, args.n, args.alpha,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat, args.b_offset, args.b_ld,
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat, args.b_offset, args.b_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
@@ -124,11 +124,11 @@ class TestXtrsm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ buffers[0].a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers[0].b_mat.Read(queue, args.b_size, b_mat_cpu);
cblasXtrsm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
@@ -137,7 +137,7 @@ class TestXtrsm {
args.m, args.n, args.alpha,
a_mat_cpu, args.a_offset, args.a_ld,
b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers[0].b_mat.Write(queue, args.b_size, b_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index b470dbf3..2cb1b2ce 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -173,14 +173,14 @@ class TestXinvert {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
try {
auto event = cl_event{};
auto inverter = Xinvert<T>(queue, &event);
inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal,
args.n, args.m,
- buffers.a_mat, args.a_offset, args.a_ld,
- buffers.b_mat);
+ buffers[0].a_mat, args.a_offset, args.a_ld,
+ buffers[0].b_mat);
clWaitForEvents(1, &event);
clReleaseEvent(event);
} catch (...) { return DispatchException(); }
@@ -189,12 +189,12 @@ class TestXinvert {
// Describes how to run a naive version of the routine (for correctness/performance comparison).
// Note that a proper clBLAS or CPU BLAS comparison is not available for non-BLAS routines.
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
+ return RunReference(args, buffers[0], queue);
}
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
+ return RunReference(args, buffers[0], queue);
}
// Describes how to download the results of the computation (more importantly: which buffer)
diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp
index d1064d0c..69f0b2b6 100644
--- a/test/routines/levelx/xomatcopy.hpp
+++ b/test/routines/levelx/xomatcopy.hpp
@@ -133,13 +133,13 @@ class TestXomatcopy {
std::vector<T>&, std::vector<T>&) {} // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Omatcopy<T>(args.layout, args.a_transpose,
args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.b_mat(), args.b_offset, args.b_ld,
+ buffers[0].a_mat(), args.a_offset, args.a_ld,
+ buffers[0].b_mat(), args.b_offset, args.b_ld,
&queue_plain, &event);
if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
return status;
@@ -147,12 +147,12 @@ class TestXomatcopy {
// Describes how to run a naive version of the routine (for correctness/performance comparison).
// Note that a proper clBLAS or CPU BLAS comparison is not available for non-BLAS routines.
- static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference1(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
+ return RunReference(args, buffers[0], queue);
}
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference2(const Arguments<T> &args, std::vector<Buffers<T>> &buffers, Queue &queue) {
+ return RunReference(args, buffers[0], queue);
}
// Describes how to download the results of the computation (more importantly: which buffer)