summaryrefslogtreecommitdiff
path: root/test/routines/level1
diff options
context:
space:
mode:
Diffstat (limited to 'test/routines/level1')
-rw-r--r--test/routines/level1/xaxpy.h46
-rw-r--r--test/routines/level1/xcopy.h46
-rw-r--r--test/routines/level1/xdot.h51
-rw-r--r--test/routines/level1/xdotc.h51
-rw-r--r--test/routines/level1/xdotu.h51
-rw-r--r--test/routines/level1/xnrm2.h46
-rw-r--r--test/routines/level1/xscal.h41
-rw-r--r--test/routines/level1/xswap.h47
8 files changed, 281 insertions, 98 deletions
diff --git a/test/routines/level1/xaxpy.h b/test/routines/level1/xaxpy.h
index 50480f46..8f72f570 100644
--- a/test/routines/level1/xaxpy.h
+++ b/test/routines/level1/xaxpy.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -65,7 +70,7 @@ class TestXaxpy {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Axpy(args.n, args.alpha,
@@ -77,16 +82,33 @@ class TestXaxpy {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXaxpy(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXaxpy(args.n, args.alpha,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXaxpy(args.n, args.alpha,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xcopy.h b/test/routines/level1/xcopy.h
index 8d324d88..0527ca6a 100644
--- a/test/routines/level1/xcopy.h
+++ b/test/routines/level1/xcopy.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -64,7 +69,7 @@ class TestXcopy {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Copy<T>(args.n,
@@ -76,16 +81,33 @@ class TestXcopy {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXcopy<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXcopy<T>(args.n,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXcopy(args.n,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xdot.h b/test/routines/level1/xdot.h
index 04669f52..d1c34c0f 100644
--- a/test/routines/level1/xdot.h
+++ b/test/routines/level1/xdot.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -68,7 +73,7 @@ class TestXdot {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dot<T>(args.n,
@@ -81,17 +86,37 @@ class TestXdot {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXdot<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXdot<T>(args.n,
+ buffers.scalar(), args.dot_offset,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXdot(args.n,
+ scalar_cpu, args.dot_offset,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xdotc.h b/test/routines/level1/xdotc.h
index e5b42ef4..a2742cb0 100644
--- a/test/routines/level1/xdotc.h
+++ b/test/routines/level1/xdotc.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -68,7 +73,7 @@ class TestXdotc {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dotc<T>(args.n,
@@ -81,17 +86,37 @@ class TestXdotc {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXdotc<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXdotc<T>(args.n,
+ buffers.scalar(), args.dot_offset,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXdotc(args.n,
+ scalar_cpu, args.dot_offset,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xdotu.h b/test/routines/level1/xdotu.h
index 6430148c..06ce979e 100644
--- a/test/routines/level1/xdotu.h
+++ b/test/routines/level1/xdotu.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -68,7 +73,7 @@ class TestXdotu {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Dotu<T>(args.n,
@@ -81,17 +86,37 @@ class TestXdotu {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXdotu<T>(args.n,
- buffers.scalar(), args.dot_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXdotu<T>(args.n,
+ buffers.scalar(), args.dot_offset,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXdotu(args.n,
+ scalar_cpu, args.dot_offset,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xnrm2.h b/test/routines/level1/xnrm2.h
index e3f77ee4..d8a0de4e 100644
--- a/test/routines/level1/xnrm2.h
+++ b/test/routines/level1/xnrm2.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -64,7 +69,7 @@ class TestXnrm2 {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Nrm2<T>(args.n,
@@ -76,16 +81,33 @@ class TestXnrm2 {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXnrm2<T>(args.n,
- buffers.scalar(), args.nrm2_offset,
- buffers.x_vec(), args.x_offset, args.x_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXnrm2<T>(args.n,
+ buffers.scalar(), args.nrm2_offset,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> scalar_cpu(args.scalar_size, static_cast<T>(0));
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ buffers.scalar.Read(queue, args.scalar_size, scalar_cpu);
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ cblasXnrm2(args.n,
+ scalar_cpu, args.nrm2_offset,
+ x_vec_cpu, args.x_offset, args.x_inc);
+ buffers.scalar.Write(queue, args.scalar_size, scalar_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xscal.h b/test/routines/level1/xscal.h
index d990afcc..35855dbd 100644
--- a/test/routines/level1/xscal.h
+++ b/test/routines/level1/xscal.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -61,7 +66,7 @@ class TestXscal {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Scal(args.n, args.alpha,
@@ -72,15 +77,29 @@ class TestXscal {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXscal(args.n, args.alpha,
- buffers.x_vec(), args.x_offset, args.x_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXscal(args.n, args.alpha,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ cblasXscal(args.n, args.alpha,
+ x_vec_cpu, args.x_offset, args.x_inc);
+ buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
diff --git a/test/routines/level1/xswap.h b/test/routines/level1/xswap.h
index 2096a2c3..ae69d3be 100644
--- a/test/routines/level1/xswap.h
+++ b/test/routines/level1/xswap.h
@@ -19,7 +19,12 @@
#include <vector>
#include <string>
-#include "wrapper_clblas.h"
+#ifdef CLBLAST_REF_CLBLAS
+ #include "wrapper_clblas.h"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+ #include "wrapper_cblas.h"
+#endif
namespace clblast {
// =================================================================================================
@@ -64,7 +69,7 @@ class TestXswap {
static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
// Describes how to run the CLBlast routine
- static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
+ static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Swap<T>(args.n,
@@ -76,16 +81,34 @@ class TestXswap {
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
- static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) {
- auto queue_plain = queue();
- auto event = cl_event{};
- auto status = clblasXswap<T>(args.n,
- buffers.x_vec(), args.x_offset, args.x_inc,
- buffers.y_vec(), args.y_offset, args.y_inc,
- 1, &queue_plain, 0, nullptr, &event);
- clWaitForEvents(1, &event);
- return static_cast<StatusCode>(status);
- }
+ #ifdef CLBLAST_REF_CLBLAS
+ static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ auto queue_plain = queue();
+ auto event = cl_event{};
+ auto status = clblasXswap<T>(args.n,
+ buffers.x_vec(), args.x_offset, args.x_inc,
+ buffers.y_vec(), args.y_offset, args.y_inc,
+ 1, &queue_plain, 0, nullptr, &event);
+ clWaitForEvents(1, &event);
+ return static_cast<StatusCode>(status);
+ }
+ #endif
+
+ // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+ #ifdef CLBLAST_REF_CBLAS
+ static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+ std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+ std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0));
+ buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Read(queue, args.y_size, y_vec_cpu);
+ cblasXswap(args.n,
+ x_vec_cpu, args.x_offset, args.x_inc,
+ y_vec_cpu, args.y_offset, args.y_inc);
+ buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+ buffers.y_vec.Write(queue, args.y_size, y_vec_cpu);
+ return StatusCode::kSuccess;
+ }
+ #endif
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {