summaryrefslogtreecommitdiff
path: root/test/routines/level3
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-04-01 13:36:24 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2017-04-01 13:36:24 +0200
commitb84d2296b87ac212474af855d916b12adf96bdb7 (patch)
tree0f2e85e1e1acef1d22f046499dd0b8a30e5da4f9 /test/routines/level3
parenta98c00a2671b8981579f3a73dca8fb3365a95e53 (diff)
Separated host-device and device-host memory copies from execution of the CBLAS reference code; for fair timing and code de-duplication
Diffstat (limited to 'test/routines/level3')
-rw-r--r--test/routines/level3/xgemm.hpp17
-rw-r--r--test/routines/level3/xhemm.hpp17
-rw-r--r--test/routines/level3/xher2k.hpp17
-rw-r--r--test/routines/level3/xherk.hpp13
-rw-r--r--test/routines/level3/xsymm.hpp17
-rw-r--r--test/routines/level3/xsyr2k.hpp17
-rw-r--r--test/routines/level3/xsyrk.hpp13
-rw-r--r--test/routines/level3/xtrmm.hpp13
-rw-r--r--test/routines/level3/xtrsm.hpp13
9 files changed, 50 insertions, 87 deletions
diff --git a/test/routines/level3/xgemm.hpp b/test/routines/level3/xgemm.hpp
index 1b12fb1c..a33cbfec 100644
--- a/test/routines/level3/xgemm.hpp
+++ b/test/routines/level3/xgemm.hpp
@@ -45,6 +45,8 @@ class TestXgemm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXgemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXgemm(convertToCBLAS(args.layout),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.b_transpose),
args.m, args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xhemm.hpp b/test/routines/level3/xhemm.hpp
index 76550b15..74029c7e 100644
--- a/test/routines/level3/xhemm.hpp
+++ b/test/routines/level3/xhemm.hpp
@@ -45,6 +45,8 @@ class TestXhemm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXhemm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXhemm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xher2k.hpp b/test/routines/level3/xher2k.hpp
index 5ca3aac6..ea13bbc1 100644
--- a/test/routines/level3/xher2k.hpp
+++ b/test/routines/level3/xher2k.hpp
@@ -45,6 +45,8 @@ class TestXher2k {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<U> &args) {
@@ -121,22 +123,15 @@ class TestXher2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
auto alpha2 = T{args.alpha, args.alpha};
cblasXher2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, alpha2,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp
index e93d887a..b1ce83e0 100644
--- a/test/routines/level3/xherk.hpp
+++ b/test/routines/level3/xherk.hpp
@@ -45,6 +45,8 @@ class TestXherk {
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<U> &args) {
@@ -110,18 +112,13 @@ class TestXherk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<U> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<U> &args, BuffersHost<T> &buffers_host, Queue&) {
cblasXherk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsymm.hpp b/test/routines/level3/xsymm.hpp
index 9d127e26..6ab644b8 100644
--- a/test/routines/level3/xsymm.hpp
+++ b/test/routines/level3/xsymm.hpp
@@ -45,6 +45,8 @@ class TestXsymm {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -121,21 +123,14 @@ class TestXsymm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsymm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyr2k.hpp b/test/routines/level3/xsyr2k.hpp
index d1bdac56..1400c4e2 100644
--- a/test/routines/level3/xsyr2k.hpp
+++ b/test/routines/level3/xsyr2k.hpp
@@ -45,6 +45,8 @@ class TestXsyr2k {
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -119,21 +121,14 @@ class TestXsyr2k {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyr2k(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xsyrk.hpp b/test/routines/level3/xsyrk.hpp
index 1330924e..2df8d6b0 100644
--- a/test/routines/level3/xsyrk.hpp
+++ b/test/routines/level3/xsyrk.hpp
@@ -45,6 +45,8 @@ class TestXsyrk {
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatC}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -110,18 +112,13 @@ class TestXsyrk {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> c_mat_cpu(args.c_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.c_mat.Read(queue, args.c_size, c_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXsyrk(convertToCBLAS(args.layout),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
args.n, args.k, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld, args.beta,
- c_mat_cpu, args.c_offset, args.c_ld);
- buffers.c_mat.Write(queue, args.c_size, c_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld, args.beta,
+ buffers_host.c_mat, args.c_offset, args.c_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrmm.hpp b/test/routines/level3/xtrmm.hpp
index 7c5bd842..84adc6e0 100644
--- a/test/routines/level3/xtrmm.hpp
+++ b/test/routines/level3/xtrmm.hpp
@@ -45,6 +45,8 @@ class TestXtrmm {
kArgAOffset, kArgBOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -112,20 +114,15 @@ class TestXtrmm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrmm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld);
return StatusCode::kSuccess;
}
#endif
diff --git a/test/routines/level3/xtrsm.hpp b/test/routines/level3/xtrsm.hpp
index a70ef03f..de5b307d 100644
--- a/test/routines/level3/xtrsm.hpp
+++ b/test/routines/level3/xtrsm.hpp
@@ -47,6 +47,8 @@ class TestXtrsm {
kArgAOffset, kArgBOffset,
kArgAlpha};
}
+ static std::vector<std::string> BuffersIn() { return {kBufMatA, kBufMatB}; }
+ static std::vector<std::string> BuffersOut() { return {kBufMatB}; }
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
@@ -124,20 +126,15 @@ class TestXtrsm {
// Describes how to run the CPU BLAS routine (for correctness/performance comparison)
#ifdef CLBLAST_REF_CBLAS
- static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
- std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
- std::vector<T> b_mat_cpu(args.b_size, static_cast<T>(0));
- buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
- buffers.b_mat.Read(queue, args.b_size, b_mat_cpu);
+ static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue &) {
cblasXtrsm(convertToCBLAS(args.layout),
convertToCBLAS(args.side),
convertToCBLAS(args.triangle),
convertToCBLAS(args.a_transpose),
convertToCBLAS(args.diagonal),
args.m, args.n, args.alpha,
- a_mat_cpu, args.a_offset, args.a_ld,
- b_mat_cpu, args.b_offset, args.b_ld);
- buffers.b_mat.Write(queue, args.b_size, b_mat_cpu);
+ buffers_host.a_mat, args.a_offset, args.a_ld,
+ buffers_host.b_mat, args.b_offset, args.b_ld);
return StatusCode::kSuccess;
}
#endif