summaryrefslogtreecommitdiff
path: root/test/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-04-01 13:36:24 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-04-01 13:36:24 +0200
commitb84d2296b87ac212474af855d916b12adf96bdb7 (patch)
tree0f2e85e1e1acef1d22f046499dd0b8a30e5da4f9 /test/routines
parenta98c00a2671b8981579f3a73dca8fb3365a95e53 (diff)
Separated host-device and device-host memory copies from execution of the CBLAS reference code; for fair timing and code de-duplication
Diffstat (limited to 'test/routines')
-rw-r--r--test/routines/level1/xamax.hpp13
-rw-r--r--test/routines/level1/xasum.hpp13
-rw-r--r--test/routines/level1/xaxpy.hpp13
-rw-r--r--test/routines/level1/xcopy.hpp13
-rw-r--r--test/routines/level1/xdot.hpp17
-rw-r--r--test/routines/level1/xdotc.hpp17
-rw-r--r--test/routines/level1/xdotu.hpp17
-rw-r--r--test/routines/level1/xnrm2.hpp13
-rw-r--r--test/routines/level1/xscal.hpp9
-rw-r--r--test/routines/level1/xswap.hpp14
-rw-r--r--test/routines/level2/xgbmv.hpp17
-rw-r--r--test/routines/level2/xgemv.hpp17
-rw-r--r--test/routines/level2/xger.hpp17
-rw-r--r--test/routines/level2/xgerc.hpp17
-rw-r--r--test/routines/level2/xgeru.hpp17
-rw-r--r--test/routines/level2/xhbmv.hpp17
-rw-r--r--test/routines/level2/xhemv.hpp17
-rw-r--r--test/routines/level2/xher.hpp13
-rw-r--r--test/routines/level2/xher2.hpp17
-rw-r--r--test/routines/level2/xhpmv.hpp17
-rw-r--r--test/routines/level2/xhpr.hpp13
-rw-r--r--test/routines/level2/xhpr2.hpp17
-rw-r--r--test/routines/level2/xsbmv.hpp17
-rw-r--r--test/routines/level2/xspmv.hpp17
-rw-r--r--test/routines/level2/xspr.hpp13
-rw-r--r--test/routines/level2/xspr2.hpp17
-rw-r--r--test/routines/level2/xsymv.hpp17
-rw-r--r--test/routines/level2/xsyr.hpp13
-rw-r--r--test/routines/level2/xsyr2.hpp17
-rw-r--r--test/routines/level2/xtbmv.hpp13
-rw-r--r--test/routines/level2/xtpmv.hpp13
-rw-r--r--test/routines/level2/xtrmv.hpp13
-rw-r--r--test/routines/level2/xtrsv.hpp13
-rw-r--r--test/routines/level3/xgemm.hpp17
-rw-r--r--test/routines/level3/xhemm.hpp17
-rw-r--r--test/routines/level3/xher2k.hpp17
-rw-r--r--test/routines/level3/xherk.hpp13
-rw-r--r--test/routines/level3/xsymm.hpp17
-rw-r--r--test/routines/level3/xsyr2k.hpp17
-rw-r--r--test/routines/level3/xsyrk.hpp13
-rw-r--r--test/routines/level3/xtrmm.hpp13
-rw-r--r--test/routines/level3/xtrsm.hpp13
-rw-r--r--test/routines/levelx/xaxpybatched.hpp13
-rw-r--r--test/routines/levelx/xgemmbatched.hpp17
-rw-r--r--test/routines/levelx/xinvert.hpp56
-rw-r--r--test/routines/levelx/xomatcopy.hpp43
46 files changed, 289 insertions, 475 deletions
diff --git a/test/routines/level1/xamax.hpp b/test/routines/level1/xamax.hpp
index a22f681f..2e844f2c 100644
--- a/test/routines/level1/xamax.hpp
+++ b/test/routines/level1/xamax.hpp
@@ -43,6 +43,8 @@ class TestXamax {
kArgXInc,
kArgXOffset, kArgImaxOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -101,15 +103,10 @@ class TestXamax {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXamax(args.n,
- scalar_cpu, args.imax_offset,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.imax_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xasum.hpp b/test/routines/level1/xasum.hpp
index 64377189..8488bfc6 100644
--- a/test/routines/level1/xasum.hpp
+++ b/test/routines/level1/xasum.hpp
@@ -43,6 +43,8 @@ class TestXasum {
kArgXInc,
kArgXOffset, kArgAsumOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -101,15 +103,10 @@ class TestXasum {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXasum(args.n,
- scalar_cpu, args.asum_offset,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.asum_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xaxpy.hpp b/test/routines/level1/xaxpy.hpp
index eba067c0..cc7d251a 100644
--- a/test/routines/level1/xaxpy.hpp
+++ b/test/routines/level1/xaxpy.hpp
@@ -44,6 +44,8 @@ class TestXaxpy {
kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -102,15 +104,10 @@ class TestXaxpy {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXaxpy(args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xcopy.hpp b/test/routines/level1/xcopy.hpp
index 753f0da5..0dbf0f3d 100644
--- a/test/routines/level1/xcopy.hpp
+++ b/test/routines/level1/xcopy.hpp
@@ -43,6 +43,8 @@ class TestXcopy {
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -101,15 +103,10 @@ class TestXcopy {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXcopy(args.n,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdot.hpp b/test/routines/level1/xdot.hpp
index 8127247d..bdf2e721 100644
--- a/test/routines/level1/xdot.hpp
+++ b/test/routines/level1/xdot.hpp
@@ -43,6 +43,8 @@ class TestXdot {
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset, kArgDotOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,18 +109,11 @@ class TestXdot {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXdot(args.n,
- scalar_cpu, args.dot_offset,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.dot_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdotc.hpp b/test/routines/level1/xdotc.hpp
index 96d97dc4..2cc71b93 100644
--- a/test/routines/level1/xdotc.hpp
+++ b/test/routines/level1/xdotc.hpp
@@ -43,6 +43,8 @@ class TestXdotc {
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset, kArgDotOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,18 +109,11 @@ class TestXdotc {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXdotc(args.n,
- scalar_cpu, args.dot_offset,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.dot_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xdotu.hpp b/test/routines/level1/xdotu.hpp
index 70c7fceb..272e1e31 100644
--- a/test/routines/level1/xdotu.hpp
+++ b/test/routines/level1/xdotu.hpp
@@ -43,6 +43,8 @@ class TestXdotu {
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset, kArgDotOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,18 +109,11 @@ class TestXdotu {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXdotu(args.n,
- scalar_cpu, args.dot_offset,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.dot_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xnrm2.hpp b/test/routines/level1/xnrm2.hpp
index ce33fe59..cb1ec683 100644
--- a/test/routines/level1/xnrm2.hpp
+++ b/test/routines/level1/xnrm2.hpp
@@ -43,6 +43,8 @@ class TestXnrm2 {
kArgXInc,
kArgXOffset, kArgNrm2Offset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufScalar}; }
+ static std::vector<std::string> BuffersOut() { return {kBufScalar}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -101,15 +103,10 @@ class TestXnrm2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXnrm2(args.n,
- scalar_cpu, args.nrm2_offset,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ buffers_host.scalar, args.nrm2_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xscal.hpp b/test/routines/level1/xscal.hpp
index d89688b4..3e6b9a38 100644
--- a/test/routines/level1/xscal.hpp
+++ b/test/routines/level1/xscal.hpp
@@ -44,6 +44,8 @@ class TestXscal {
kArgXOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -96,12 +98,9 @@ class TestXscal {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXscal(args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level1/xswap.hpp b/test/routines/level1/xswap.hpp
index 49b0d3d0..d9b84dc4 100644
--- a/test/routines/level1/xswap.hpp
+++ b/test/routines/level1/xswap.hpp
@@ -43,6 +43,8 @@ class TestXswap {
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX, kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -101,16 +103,10 @@ class TestXswap {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXswap(args.n,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgbmv.hpp b/test/routines/level2/xgbmv.hpp
index f371b9a7..990ef49f 100644
--- a/test/routines/level2/xgbmv.hpp
+++ b/test/routines/level2/xgbmv.hpp
@@ -45,6 +45,8 @@ class TestXgbmv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -118,20 +120,13 @@ class TestXgbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
args.m, args.n, args.kl, args.ku, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgemv.hpp b/test/routines/level2/xgemv.hpp
index 2442be4c..a007cb62 100644
--- a/test/routines/level2/xgemv.hpp
+++ b/test/routines/level2/xgemv.hpp
@@ -45,6 +45,8 @@ class TestXgemv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -118,20 +120,13 @@ class TestXgemv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgemv(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xger.hpp b/test/routines/level2/xger.hpp
index 3e7ccbc3..5c131e2d 100644
--- a/test/routines/level2/xger.hpp
+++ b/test/routines/level2/xger.hpp
@@ -45,6 +45,8 @@ class TestXger {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -113,19 +115,12 @@ class TestXger {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXger(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgerc.hpp b/test/routines/level2/xgerc.hpp
index d880ae1f..e3544424 100644
--- a/test/routines/level2/xgerc.hpp
+++ b/test/routines/level2/xgerc.hpp
@@ -45,6 +45,8 @@ class TestXgerc {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -113,19 +115,12 @@ class TestXgerc {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgerc(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xgeru.hpp b/test/routines/level2/xgeru.hpp
index 1735e42a..1d81e292 100644
--- a/test/routines/level2/xgeru.hpp
+++ b/test/routines/level2/xgeru.hpp
@@ -45,6 +45,8 @@ class TestXgeru {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -113,19 +115,12 @@ class TestXgeru {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgeru(convertToCBLAS(args.layout),
args.m, args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhbmv.hpp b/test/routines/level2/xhbmv.hpp
index 99538bf1..21194fd6 100644
--- a/test/routines/level2/xhbmv.hpp
+++ b/test/routines/level2/xhbmv.hpp
@@ -45,6 +45,8 @@ class TestXhbmv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXhbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.kl, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhemv.hpp b/test/routines/level2/xhemv.hpp
index 3792cb66..ffef8ff8 100644
--- a/test/routines/level2/xhemv.hpp
+++ b/test/routines/level2/xhemv.hpp
@@ -45,6 +45,8 @@ class TestXhemv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXhemv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhemv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xher.hpp b/test/routines/level2/xher.hpp
index c58eb189..083bd3fc 100644
--- a/test/routines/level2/xher.hpp
+++ b/test/routines/level2/xher.hpp
@@ -45,6 +45,8 @@ class TestXher {
kArgAOffset, kArgXOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<U> &args) {
@@ -106,17 +108,12 @@ class TestXher {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
cblasXher(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xher2.hpp b/test/routines/level2/xher2.hpp
index 8a7eb0b6..7bd890a5 100644
--- a/test/routines/level2/xher2.hpp
+++ b/test/routines/level2/xher2.hpp
@@ -45,6 +45,8 @@ class TestXher2 {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXher2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXher2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpmv.hpp b/test/routines/level2/xhpmv.hpp
index 0862b619..285dd6d3 100644
--- a/test/routines/level2/xhpmv.hpp
+++ b/test/routines/level2/xhpmv.hpp
@@ -45,6 +45,8 @@ class TestXhpmv {
kArgAPOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXhpmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhpmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- ap_mat_cpu, args.ap_offset,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.ap_mat, args.ap_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpr.hpp b/test/routines/level2/xhpr.hpp
index 5b454174..88bae86b 100644
--- a/test/routines/level2/xhpr.hpp
+++ b/test/routines/level2/xhpr.hpp
@@ -45,6 +45,8 @@ class TestXhpr {
kArgAPOffset, kArgXOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatAP}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<U> &args) {
@@ -106,17 +108,12 @@ class TestXhpr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
cblasXhpr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.ap_mat, args.ap_offset);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xhpr2.hpp b/test/routines/level2/xhpr2.hpp
index b770da2e..cd10fa00 100644
--- a/test/routines/level2/xhpr2.hpp
+++ b/test/routines/level2/xhpr2.hpp
@@ -45,6 +45,8 @@ class TestXhpr2 {
kArgAPOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatAP}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXhpr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhpr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.ap_mat, args.ap_offset);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsbmv.hpp b/test/routines/level2/xsbmv.hpp
index 7a836170..5c70aba5 100644
--- a/test/routines/level2/xsbmv.hpp
+++ b/test/routines/level2/xsbmv.hpp
@@ -45,6 +45,8 @@ class TestXsbmv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXsbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.kl, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspmv.hpp b/test/routines/level2/xspmv.hpp
index 352c8cfd..560f5baa 100644
--- a/test/routines/level2/xspmv.hpp
+++ b/test/routines/level2/xspmv.hpp
@@ -45,6 +45,8 @@ class TestXspmv {
kArgAPOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXspmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXspmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- ap_mat_cpu, args.ap_offset,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.ap_mat, args.ap_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspr.hpp b/test/routines/level2/xspr.hpp
index 988bcdc2..2e12db33 100644
--- a/test/routines/level2/xspr.hpp
+++ b/test/routines/level2/xspr.hpp
@@ -45,6 +45,8 @@ class TestXspr {
kArgAPOffset, kArgXOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatAP}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -106,17 +108,12 @@ class TestXspr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXspr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.ap_mat, args.ap_offset);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xspr2.hpp b/test/routines/level2/xspr2.hpp
index ee517bc1..a7e22227 100644
--- a/test/routines/level2/xspr2.hpp
+++ b/test/routines/level2/xspr2.hpp
@@ -45,6 +45,8 @@ class TestXspr2 {
kArgAPOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatAP}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXspr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXspr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- ap_mat_cpu, args.ap_offset);
- buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.ap_mat, args.ap_offset);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsymv.hpp b/test/routines/level2/xsymv.hpp
index 5eecfb74..d9cf9c1e 100644
--- a/test/routines/level2/xsymv.hpp
+++ b/test/routines/level2/xsymv.hpp
@@ -45,6 +45,8 @@ class TestXsymv {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXsymv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsymv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc, args.beta,
- y_vec_cpu, args.y_offset, args.y_inc);
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc, args.beta,
+ buffers_host.y_vec, args.y_offset, args.y_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsyr.hpp b/test/routines/level2/xsyr.hpp
index ac4ee1ff..b60c3a36 100644
--- a/test/routines/level2/xsyr.hpp
+++ b/test/routines/level2/xsyr.hpp
@@ -45,6 +45,8 @@ class TestXsyr {
kArgAOffset, kArgXOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -106,17 +108,12 @@ class TestXsyr {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyr(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xsyr2.hpp b/test/routines/level2/xsyr2.hpp
index 43644883..dd10a3d0 100644
--- a/test/routines/level2/xsyr2.hpp
+++ b/test/routines/level2/xsyr2.hpp
@@ -45,6 +45,8 @@ class TestXsyr2 {
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatA}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -112,20 +114,13 @@ class TestXsyr2 {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyr2(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
args.n, args.alpha,
- x_vec_cpu, args.x_offset, args.x_inc,
- y_vec_cpu, args.y_offset, args.y_inc,
- a_mat_cpu, args.a_offset, args.a_ld);
- buffers.a_mat.Write(queue, args.a_size, a_mat_cpu);
+ buffers_host.x_vec, args.x_offset, args.x_inc,
+ buffers_host.y_vec, args.y_offset, args.y_inc,
+ buffers_host.a_mat, args.a_offset, args.a_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtbmv.hpp b/test/routines/level2/xtbmv.hpp
index ab9244af..7eb8ce9e 100644
--- a/test/routines/level2/xtbmv.hpp
+++ b/test/routines/level2/xtbmv.hpp
@@ -44,6 +44,8 @@ class TestXtbmv {
kArgALeadDim, kArgXInc,
kArgAOffset, kArgXOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,19 +109,14 @@ class TestXtbmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtbmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.n, args.kl,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtpmv.hpp b/test/routines/level2/xtpmv.hpp
index 3821e1a4..7f4842f0 100644
--- a/test/routines/level2/xtpmv.hpp
+++ b/test/routines/level2/xtpmv.hpp
@@ -44,6 +44,8 @@ class TestXtpmv {
kArgXInc,
kArgAPOffset, kArgXOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatAP, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,19 +109,14 @@ class TestXtpmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> ap_mat_cpu(args.ap_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtpmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.n,
- ap_mat_cpu, args.ap_offset,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers_host.ap_mat, args.ap_offset,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtrmv.hpp b/test/routines/level2/xtrmv.hpp
index 7211c757..cb7527ed 100644
--- a/test/routines/level2/xtrmv.hpp
+++ b/test/routines/level2/xtrmv.hpp
@@ -44,6 +44,8 @@ class TestXtrmv {
kArgALeadDim, kArgXInc,
kArgAOffset, kArgXOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -107,19 +109,14 @@ class TestXtrmv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrmv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.n,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index 78b9672f..63d34758 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -44,6 +44,8 @@ class TestXtrsv {
kArgALeadDim, kArgXInc,
kArgAOffset, kArgXOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufVecX}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecX}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
@@ -122,19 +124,14 @@ class TestXtrsv {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrsv(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.n,
- a_mat_cpu, args.a_offset, args.a_ld,
- x_vec_cpu, args.x_offset, args.x_inc);
- buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.x_vec, args.x_offset, args.x_inc);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index 1b12fb1c..a33cbfec 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -45,6 +45,8 @@ class TestXgemm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXgemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgemm(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.b_transpose),
args.m, args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp
index 76550b15..74029c7e 100644
--- a/test/routines/level3/xhemm.hpp
+++ b/test/routines/level3/xhemm.hpp
@@ -45,6 +45,8 @@ class TestXhemm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXhemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhemm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp
index 5ca3aac6..ea13bbc1 100644
--- a/test/routines/level3/xher2k.hpp
+++ b/test/routines/level3/xher2k.hpp
@@ -45,6 +45,8 @@ class TestXher2k {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<U> &args) {
@@ -121,22 +123,15 @@ class TestXher2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
auto alpha2 = T{args.alpha, args.alpha};
cblasXher2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, alpha2,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp
index e93d887a..b1ce83e0 100644
--- a/test/routines/level3/xherk.hpp
+++ b/test/routines/level3/xherk.hpp
@@ -45,6 +45,8 @@ class TestXherk {
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<U> &args) {
@@ -110,18 +112,13 @@ class TestXherk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
cblasXherk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp
index 9d127e26..6ab644b8 100644
--- a/test/routines/level3/xsymm.hpp
+++ b/test/routines/level3/xsymm.hpp
@@ -45,6 +45,8 @@ class TestXsymm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXsymm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsymm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp
index d1bdac56..1400c4e2 100644
--- a/test/routines/level3/xsyr2k.hpp
+++ b/test/routines/level3/xsyr2k.hpp
@@ -45,6 +45,8 @@ class TestXsyr2k {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -119,21 +121,14 @@ class TestXsyr2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyr2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp
index 1330924e..2df8d6b0 100644
--- a/test/routines/level3/xsyrk.hpp
+++ b/test/routines/level3/xsyrk.hpp
@@ -45,6 +45,8 @@ class TestXsyrk {
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -110,18 +112,13 @@ class TestXsyrk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyrk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp
index 7c5bd842..84adc6e0 100644
--- a/test/routines/level3/xtrmm.hpp
+++ b/test/routines/level3/xtrmm.hpp
@@ -45,6 +45,8 @@ class TestXtrmm {
kArgAOffset, kArgBOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -112,20 +114,15 @@ class TestXtrmm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrmm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index a70ef03f..de5b307d 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -47,6 +47,8 @@ class TestXtrsm {
kArgAOffset, kArgBOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -124,20 +126,15 @@ class TestXtrsm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrsm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp
index ee15ff92..05141bbb 100644
--- a/test/routines/levelx/xaxpybatched.hpp
+++ b/test/routines/levelx/xaxpybatched.hpp
@@ -45,6 +45,8 @@ class TestXaxpyBatched {
kArgXInc, kArgYInc,
kArgBatchCount, kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY}; }
+ static std::vector<std::string> BuffersOut() { return {kBufVecY}; }
// Helper for the sizes per batch
static size_t PerBatchSizeX(const Arguments<T> &args) { return args.n * args.x_inc; }
@@ -123,17 +125,12 @@ class TestXaxpyBatched {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
- std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
- buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
- buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
cblasXaxpy(args.n, args.alphas[batch],
- x_vec_cpu, args.x_offsets[batch], args.x_inc,
- y_vec_cpu, args.y_offsets[batch], args.y_inc);
+ buffers_host.x_vec, args.x_offsets[batch], args.x_inc,
+ buffers_host.y_vec, args.y_offsets[batch], args.y_inc);
}
- buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp
index 80a30e4d..ab5f20c5 100644
--- a/test/routines/levelx/xgemmbatched.hpp
+++ b/test/routines/levelx/xgemmbatched.hpp
@@ -45,6 +45,8 @@ class TestXgemmBatched {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgBatchCount, kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Helper for the sizes per batch
static size_t PerBatchSizeA(const Arguments<T> &args) {
@@ -152,23 +154,16 @@ class TestXgemmBatched {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
for (auto batch = size_t{0}; batch < args.batch_count; ++batch) {
cblasXgemm(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.b_transpose),
args.m, args.n, args.k, args.alphas[batch],
- a_mat_cpu, args.a_offsets[batch], args.a_ld,
- b_mat_cpu, args.b_offsets[batch], args.b_ld, args.betas[batch],
- c_mat_cpu, args.c_offsets[batch], args.c_ld);
+ buffers_host.a_mat, args.a_offsets[batch], args.a_ld,
+ buffers_host.b_mat, args.b_offsets[batch], args.b_ld, args.betas[batch],
+ buffers_host.c_mat, args.c_offsets[batch], args.c_ld);
}
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp
index b470dbf3..ffb484b0 100644
--- a/test/routines/levelx/xinvert.hpp
+++ b/test/routines/levelx/xinvert.hpp
@@ -25,17 +25,10 @@ namespace clblast {
// =================================================================================================
template <typename T>
-StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) {
const bool is_upper = ((args.triangle == Triangle::kUpper && args.layout != Layout::kRowMajor) ||
(args.triangle == Triangle::kLower && args.layout == Layout::kRowMajor));
- // Data transfer from OpenCL to std::vector
- std::vector<T> a_mat_cpu(args.a_size, T{0.0});
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
-
- // Creates the output buffer
- std::vector<T> b_mat_cpu(args.b_size, T{0.0});
-
// Helper variables
const auto block_size = args.m;
const auto num_blocks = CeilDiv(args.n, block_size);
@@ -60,11 +53,11 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
auto a_value = T{1.0};
if (args.diagonal == Diagonal::kNonUnit) {
if (i + block_id * block_size < args.n) {
- if (a_mat_cpu[i * a_ld + i + a_offset] == T{0.0}) { return StatusCode::kUnknownError; }
- a_value = T{1.0} / a_mat_cpu[i * a_ld + i + a_offset];
+ if (buffers_host.a_mat[i * a_ld + i + a_offset] == T{0.0}) { return StatusCode::kUnknownError; }
+ a_value = T{1.0} / buffers_host.a_mat[i * a_ld + i + a_offset];
}
}
- b_mat_cpu[i * b_ld + i + b_offset] = a_value;
+ buffers_host.b_mat[i * b_ld + i + b_offset] = a_value;
}
// Inverts the upper triangle row by row
@@ -75,11 +68,11 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
for (auto k = i + 1; k <= j; ++k) {
auto a_value = T{0.0};
if ((i + block_id * block_size < args.n) && (k + block_id * block_size < args.n)) {
- a_value = a_mat_cpu[k * a_ld + i + a_offset];
+ a_value = buffers_host.a_mat[k * a_ld + i + a_offset];
}
- sum += a_value * b_mat_cpu[j * b_ld + k + b_offset];
+ sum += a_value * buffers_host.b_mat[j * b_ld + k + b_offset];
}
- b_mat_cpu[j * b_ld + i + b_offset] = - sum * b_mat_cpu[i * b_ld + i + b_offset];
+ buffers_host.b_mat[j * b_ld + i + b_offset] = - sum * buffers_host.b_mat[i * b_ld + i + b_offset];
}
}
}
@@ -92,35 +85,32 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
for (auto k = j; k < i; ++k) {
auto a_value = T{0.0};
if ((i + block_id * block_size < args.n) && (k + block_id * block_size < args.n)) {
- a_value = a_mat_cpu[k * a_ld + i + a_offset];
+ a_value = buffers_host.a_mat[k * a_ld + i + a_offset];
}
- sum += a_value * b_mat_cpu[j * b_ld + k + b_offset];
+ sum += a_value * buffers_host.b_mat[j * b_ld + k + b_offset];
}
- b_mat_cpu[j * b_ld + i + b_offset] = - sum * b_mat_cpu[i * b_ld + i + b_offset];
+ buffers_host.b_mat[j * b_ld + i + b_offset] = - sum * buffers_host.b_mat[i * b_ld + i + b_offset];
}
}
}
}
-
- // Data transfer back to OpenCL
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
return StatusCode::kSuccess;
}
// Half-precision version calling the above reference implementation after conversions
template <>
-StatusCode RunReference<half>(const Arguments<half> &args, Buffers<half> &buffers, Queue &queue) {
- auto a_buffer2 = HalfToFloatBuffer(buffers.a_mat, queue());
- auto b_buffer2 = HalfToFloatBuffer(buffers.b_mat, queue());
- auto dummy = clblast::Buffer<float>(0);
- auto buffers2 = Buffers<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) {
+ auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
+ auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
+ auto dummy = std::vector<float>(0);
+ auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.a_ld = args.a_ld; args2.m = args.m; args2.n = args.n;
args2.a_offset = args.a_offset;
args2.layout = args.layout; args2.triangle = args.triangle; args2.diagonal = args.diagonal;
- auto status = RunReference(args2, buffers2, queue);
- FloatToHalfBuffer(buffers.b_mat, b_buffer2, queue());
+ auto status = RunReference(args2, buffers2);
+ FloatToHalfBuffer(buffers_host.b_mat, b_buffer2);
return status;
}
@@ -140,6 +130,8 @@ class TestXinvert {
kArgLayout, kArgTriangle, kArgDiagonal,
kArgALeadDim, kArgAOffset};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -190,11 +182,15 @@ class TestXinvert {
// Describes how to run a naive version of the routine (for correctness/performance comparison).
// Note that a proper clBLAS or CPU BLAS comparison is not available for non-BLAS routines.
static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ auto buffers_host = BuffersHost<T>();
+ DeviceToHost(args, buffers, buffers_host, queue, BuffersIn());
+ const auto status = RunReference(args, buffers_host);
+ HostToDevice(args, buffers, buffers_host, queue, BuffersOut());
+ return status;
}
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
+ return RunReference(args, buffers_host);
}
// Describes how to download the results of the computation (more importantly: which buffer)
diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp
index d1064d0c..d5973b4c 100644
--- a/test/routines/levelx/xomatcopy.hpp
+++ b/test/routines/levelx/xomatcopy.hpp
@@ -23,13 +23,7 @@ namespace clblast {
// =================================================================================================
template <typename T>
-StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
-
- // Data transfer from OpenCL to std::vector
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) {
// Checking for invalid arguments
const auto a_rotated = (args.layout == Layout::kRowMajor);
@@ -40,8 +34,8 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
if ((args.m == 0) || (args.n == 0)) { return StatusCode::kInvalidDimension; }
if ((args.a_ld < args.m && !a_rotated) || (args.a_ld < args.n && a_rotated)) { return StatusCode::kInvalidLeadDimA; }
if ((args.b_ld < args.m && !b_rotated) || (args.b_ld < args.n && b_rotated)) { return StatusCode::kInvalidLeadDimB; }
- if (buffers.a_mat.GetSize() < (a_base + args.a_offset) * sizeof(T)) { return StatusCode::kInsufficientMemoryA; }
- if (buffers.b_mat.GetSize() < (b_base + args.b_offset) * sizeof(T)) { return StatusCode::kInsufficientMemoryB; }
+ if (buffers_host.a_mat.size() * sizeof(T) < (a_base + args.a_offset) * sizeof(T)) { return StatusCode::kInsufficientMemoryA; }
+ if (buffers_host.b_mat.size() * sizeof(T) < (b_base + args.b_offset) * sizeof(T)) { return StatusCode::kInsufficientMemoryB; }
// Matrix copy, scaling, and/or transpose
for (auto id1 = size_t{0}; id1 < args.m; ++id1) {
@@ -52,30 +46,27 @@ StatusCode RunReference(const Arguments<T> &args, Buffers<T> &buffers, Queue &qu
const auto b_two = (b_rotated) ? id1 : id2;
const auto a_index = a_two * args.a_ld + a_one + args.a_offset;
const auto b_index = b_two * args.b_ld + b_one + args.b_offset;
- b_mat_cpu[b_index] = args.alpha * a_mat_cpu[a_index];
+ buffers_host.b_mat[b_index] = args.alpha * buffers_host.a_mat[a_index];
}
}
-
- // Data transfer back to OpenCL
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
return StatusCode::kSuccess;
}
// Half-precision version calling the above reference implementation after conversions
template <>
-StatusCode RunReference<half>(const Arguments<half> &args, Buffers<half> &buffers, Queue &queue) {
- auto a_buffer2 = HalfToFloatBuffer(buffers.a_mat, queue());
- auto b_buffer2 = HalfToFloatBuffer(buffers.b_mat, queue());
- auto dummy = clblast::Buffer<float>(0);
- auto buffers2 = Buffers<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
+StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) {
+ auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
+ auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
+ auto dummy = std::vector<float>(0);
+ auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.a_ld = args.a_ld; args2.b_ld = args.b_ld; args2.m = args.m; args2.n = args.n;
args2.a_offset = args.a_offset; args2.b_offset = args.b_offset;
args2.layout = args.layout; args2.a_transpose = args.a_transpose;
args2.alpha = HalfToFloat(args.alpha);
- auto status = RunReference(args2, buffers2, queue);
- FloatToHalfBuffer(buffers.b_mat, b_buffer2, queue());
+ auto status = RunReference(args2, buffers2);
+ FloatToHalfBuffer(buffers_host.b_mat, b_buffer2);
return status;
}
@@ -97,6 +88,8 @@ class TestXomatcopy {
kArgAOffset, kArgBOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -148,11 +141,15 @@ class TestXomatcopy {
// Describes how to run a naive version of the routine (for correctness/performance comparison).
// Note that a proper clBLAS or CPU BLAS comparison is not available for non-BLAS routines.
static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ auto buffers_host = BuffersHost<T>();
+ DeviceToHost(args, buffers, buffers_host, queue, BuffersIn());
+ const auto status = RunReference(args, buffers_host);
+ HostToDevice(args, buffers, buffers_host, queue, BuffersOut());
+ return status;
}
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- return RunReference(args, buffers, queue);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) {
+ return RunReference(args, buffers_host);
}
// Describes how to download the results of the computation (more importantly: which buffer)