summaryrefslogtreecommitdiff
path: root/test/routines/level2/xgemv.h
diff options
context:
space:
mode:
Diffstat (limited to 'test/routines/level2/xgemv.h')
-rw-r--r--test/routines/level2/xgemv.h57
1 files changed, 42 insertions, 15 deletions
diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h
index 2924d498..a70ccd34 100644
--- a/test/routines/level2/xgemv.h
+++ b/test/routines/level2/xgemv.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -76,7 +81,7 @@ class TestXgemv {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemv(args.layout, args.a_transpose,
@@ -90,19 +95,41 @@ class TestXgemv {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXgemv(static_cast<clblasOrder>(args.layout),
- static_cast<clblasTranspose>(args.a_transpose),
- args.m, args.n, args.alpha,
- buffers.a_mat(), args.a_offset, args.a_ld,
- buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXgemv(static_cast<clblasOrder>(args.layout),
+ static_cast<clblasTranspose>(args.a_transpose),
+ args.m, args.n, args.alpha,
+ buffers.a_mat(), args.a_offset, args.a_ld,
+ buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXgemv(convertToCBLAS(args.layout),
+ convertToCBLAS(args.a_transpose),
+ args.m, args.n, args.alpha,
+ a_mat_cpu, args.a_offset, args.a_ld,
+ x_vec_cpu, args.x_offset, args.x_inc, args.beta,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {