From 1a82861a902e17f15486664b340c50530cce6542 Mon Sep 17 00:00:00 2001 From: cnugteren Date: Sat, 2 Apr 2016 11:58:00 -0700 Subject: Added support for testing (performance and correctness) against a CPU BLAS library --- test/routines/level1/xaxpy.h | 46 ++++++++++++++++++++++--------- test/routines/level1/xcopy.h | 46 ++++++++++++++++++++++--------- test/routines/level1/xdot.h | 51 ++++++++++++++++++++++++++--------- test/routines/level1/xdotc.h | 51 ++++++++++++++++++++++++++--------- test/routines/level1/xdotu.h | 51 ++++++++++++++++++++++++++--------- test/routines/level1/xnrm2.h | 46 ++++++++++++++++++++++--------- test/routines/level1/xscal.h | 41 ++++++++++++++++++++-------- test/routines/level1/xswap.h | 47 +++++++++++++++++++++++--------- test/routines/level2/xgbmv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xgemv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xger.h | 54 +++++++++++++++++++++++++++---------- test/routines/level2/xgerc.h | 54 +++++++++++++++++++++++++++---------- test/routines/level2/xgeru.h | 54 +++++++++++++++++++++++++++---------- test/routines/level2/xhbmv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xhemv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xher.h | 52 +++++++++++++++++++++++++---------- test/routines/level2/xher2.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xhpmv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xhpr.h | 52 +++++++++++++++++++++++++---------- test/routines/level2/xhpr2.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xsbmv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xspmv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xspr.h | 52 +++++++++++++++++++++++++---------- test/routines/level2/xspr2.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xsymv.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xsyr.h | 52 +++++++++++++++++++++++++---------- test/routines/level2/xsyr2.h | 57 ++++++++++++++++++++++++++++----------- test/routines/level2/xtbmv.h | 58 ++++++++++++++++++++++++++++----------- test/routines/level2/xtpmv.h | 58 ++++++++++++++++++++++++++++----------- test/routines/level2/xtrmv.h | 58 ++++++++++++++++++++++++++++----------- test/routines/level3/xgemm.h | 60 ++++++++++++++++++++++++++++++----------- test/routines/level3/xhemm.h | 60 ++++++++++++++++++++++++++++++----------- test/routines/level3/xher2k.h | 63 +++++++++++++++++++++++++++++++------------ test/routines/level3/xherk.h | 55 ++++++++++++++++++++++++++----------- test/routines/level3/xsymm.h | 60 ++++++++++++++++++++++++++++++----------- test/routines/level3/xsyr2k.h | 60 ++++++++++++++++++++++++++++++----------- test/routines/level3/xsyrk.h | 55 ++++++++++++++++++++++++++----------- test/routines/level3/xtrmm.h | 61 +++++++++++++++++++++++++++++------------ 38 files changed, 1529 insertions(+), 552 deletions(-) (limited to 'test/routines') diff --git a/test/routines/level1/xaxpy.h b/test/routines/level1/xaxpy.h index 50480f46..8f72f570 100644 --- a/test/routines/level1/xaxpy.h +++ b/test/routines/level1/xaxpy.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -65,7 +70,7 @@ class TestXaxpy { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Axpy(args.n, args.alpha, @@ -77,16 +82,33 @@ class TestXaxpy { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXaxpy(args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXaxpy(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXaxpy(args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xcopy.h b/test/routines/level1/xcopy.h index 8d324d88..0527ca6a 100644 --- a/test/routines/level1/xcopy.h +++ b/test/routines/level1/xcopy.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -64,7 +69,7 @@ class TestXcopy { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Copy(args.n, @@ -76,16 +81,33 @@ class TestXcopy { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXcopy(args.n, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXcopy(args.n, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXcopy(args.n, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xdot.h b/test/routines/level1/xdot.h index 04669f52..d1c34c0f 100644 --- a/test/routines/level1/xdot.h +++ b/test/routines/level1/xdot.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -68,7 +73,7 @@ class TestXdot { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Dot(args.n, @@ -81,17 +86,37 @@ class TestXdot { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXdot(args.n, - buffers.scalar(), args.dot_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXdot(args.n, + buffers.scalar(), args.dot_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector scalar_cpu(args.scalar_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXdot(args.n, + scalar_cpu, args.dot_offset, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xdotc.h b/test/routines/level1/xdotc.h index e5b42ef4..a2742cb0 100644 --- a/test/routines/level1/xdotc.h +++ b/test/routines/level1/xdotc.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -68,7 +73,7 @@ class TestXdotc { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Dotc(args.n, @@ -81,17 +86,37 @@ class TestXdotc { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXdotc(args.n, - buffers.scalar(), args.dot_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXdotc(args.n, + buffers.scalar(), args.dot_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector scalar_cpu(args.scalar_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXdotc(args.n, + scalar_cpu, args.dot_offset, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xdotu.h b/test/routines/level1/xdotu.h index 6430148c..06ce979e 100644 --- a/test/routines/level1/xdotu.h +++ b/test/routines/level1/xdotu.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -68,7 +73,7 @@ class TestXdotu { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Dotu(args.n, @@ -81,17 +86,37 @@ class TestXdotu { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXdotu(args.n, - buffers.scalar(), args.dot_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXdotu(args.n, + buffers.scalar(), args.dot_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector scalar_cpu(args.scalar_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXdotu(args.n, + scalar_cpu, args.dot_offset, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xnrm2.h b/test/routines/level1/xnrm2.h index e3f77ee4..d8a0de4e 100644 --- a/test/routines/level1/xnrm2.h +++ b/test/routines/level1/xnrm2.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -64,7 +69,7 @@ class TestXnrm2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Nrm2(args.n, @@ -76,16 +81,33 @@ class TestXnrm2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXnrm2(args.n, - buffers.scalar(), args.nrm2_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXnrm2(args.n, + buffers.scalar(), args.nrm2_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector scalar_cpu(args.scalar_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.scalar.Read(queue, args.scalar_size, scalar_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXnrm2(args.n, + scalar_cpu, args.nrm2_offset, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.scalar.Write(queue, args.scalar_size, scalar_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xscal.h b/test/routines/level1/xscal.h index d990afcc..35855dbd 100644 --- a/test/routines/level1/xscal.h +++ b/test/routines/level1/xscal.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -61,7 +66,7 @@ class TestXscal { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Scal(args.n, args.alpha, @@ -72,15 +77,29 @@ class TestXscal { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXscal(args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXscal(args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXscal(args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level1/xswap.h b/test/routines/level1/xswap.h index 2096a2c3..ae69d3be 100644 --- a/test/routines/level1/xswap.h +++ b/test/routines/level1/xswap.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -64,7 +69,7 @@ class TestXswap { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Swap(args.n, @@ -76,16 +81,34 @@ class TestXswap { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXswap(args.n, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXswap(args.n, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXswap(args.n, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xgbmv.h b/test/routines/level2/xgbmv.h index 0e238804..b875075d 100644 --- a/test/routines/level2/xgbmv.h +++ b/test/routines/level2/xgbmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -76,7 +81,7 @@ class TestXgbmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gbmv(args.layout, args.a_transpose, @@ -90,19 +95,41 @@ class TestXgbmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXgbmv(static_cast(args.layout), - static_cast(args.a_transpose), - args.m, args.n, args.kl, args.ku, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgbmv(static_cast(args.layout), + static_cast(args.a_transpose), + args.m, args.n, args.kl, args.ku, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXgbmv(convertToCBLAS(args.layout), + convertToCBLAS(args.a_transpose), + args.m, args.n, args.kl, args.ku, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xgemv.h b/test/routines/level2/xgemv.h index 2924d498..a70ccd34 100644 --- a/test/routines/level2/xgemv.h +++ b/test/routines/level2/xgemv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -76,7 +81,7 @@ class TestXgemv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemv(args.layout, args.a_transpose, @@ -90,19 +95,41 @@ class TestXgemv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXgemv(static_cast(args.layout), - static_cast(args.a_transpose), - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgemv(static_cast(args.layout), + static_cast(args.a_transpose), + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXgemv(convertToCBLAS(args.layout), + convertToCBLAS(args.a_transpose), + args.m, args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xger.h b/test/routines/level2/xger.h index 98296e92..32c2a505 100644 --- a/test/routines/level2/xger.h +++ b/test/routines/level2/xger.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -72,7 +77,7 @@ class TestXger { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Ger(args.layout, @@ -86,18 +91,39 @@ class TestXger { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXger(static_cast(args.layout), - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXger(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXger(convertToCBLAS(args.layout), + args.m, args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xgerc.h b/test/routines/level2/xgerc.h index 77258d92..4b6954f6 100644 --- a/test/routines/level2/xgerc.h +++ b/test/routines/level2/xgerc.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -72,7 +77,7 @@ class TestXgerc { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gerc(args.layout, @@ -86,18 +91,39 @@ class TestXgerc { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXgerc(static_cast(args.layout), - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgerc(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXgerc(convertToCBLAS(args.layout), + args.m, args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xgeru.h b/test/routines/level2/xgeru.h index e5f5f235..295e69e5 100644 --- a/test/routines/level2/xgeru.h +++ b/test/routines/level2/xgeru.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -72,7 +77,7 @@ class TestXgeru { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Geru(args.layout, @@ -86,18 +91,39 @@ class TestXgeru { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXgeru(static_cast(args.layout), - args.m, args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgeru(static_cast(args.layout), + args.m, args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXgeru(convertToCBLAS(args.layout), + args.m, args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xhbmv.h b/test/routines/level2/xhbmv.h index 34e1502f..e0bdc4da 100644 --- a/test/routines/level2/xhbmv.h +++ b/test/routines/level2/xhbmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXhbmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hbmv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXhbmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhbmv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.kl, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhbmv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXhbmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.kl, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xhemv.h b/test/routines/level2/xhemv.h index 80e22157..fa242961 100644 --- a/test/routines/level2/xhemv.h +++ b/test/routines/level2/xhemv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXhemv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hemv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXhemv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhemv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhemv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXhemv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xher.h b/test/routines/level2/xher.h index 53c4200f..7d0e8cc3 100644 --- a/test/routines/level2/xher.h +++ b/test/routines/level2/xher.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -66,7 +71,7 @@ class TestXher { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Her(args.layout, args.triangle, @@ -79,18 +84,37 @@ class TestXher { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXher(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXher(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXher(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xher2.h b/test/routines/level2/xher2.h index c12ff827..445bba74 100644 --- a/test/routines/level2/xher2.h +++ b/test/routines/level2/xher2.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXher2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Her2(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXher2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXher2(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXher2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXher2(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xhpmv.h b/test/routines/level2/xhpmv.h index 8fd85b62..406e564f 100644 --- a/test/routines/level2/xhpmv.h +++ b/test/routines/level2/xhpmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXhpmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hpmv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXhpmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhpmv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhpmv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXhpmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + ap_mat_cpu, args.ap_offset, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xhpr.h b/test/routines/level2/xhpr.h index 03599ddc..6f56d3f3 100644 --- a/test/routines/level2/xhpr.h +++ b/test/routines/level2/xhpr.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -66,7 +71,7 @@ class TestXhpr { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hpr(args.layout, args.triangle, @@ -79,18 +84,37 @@ class TestXhpr { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhpr(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.ap_mat(), args.ap_offset, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhpr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXhpr(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + ap_mat_cpu, args.ap_offset); + buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xhpr2.h b/test/routines/level2/xhpr2.h index 68fbc76d..43889cb9 100644 --- a/test/routines/level2/xhpr2.h +++ b/test/routines/level2/xhpr2.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXhpr2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hpr2(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXhpr2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhpr2(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.ap_mat(), args.ap_offset, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhpr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXhpr2(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + ap_mat_cpu, args.ap_offset); + buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xsbmv.h b/test/routines/level2/xsbmv.h index 5bc17e49..9a5c5140 100644 --- a/test/routines/level2/xsbmv.h +++ b/test/routines/level2/xsbmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXsbmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Sbmv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXsbmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsbmv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.kl, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsbmv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.kl, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXsbmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.kl, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xspmv.h b/test/routines/level2/xspmv.h index e335da42..913af0cd 100644 --- a/test/routines/level2/xspmv.h +++ b/test/routines/level2/xspmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXspmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Spmv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXspmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXspmv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXspmv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXspmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + ap_mat_cpu, args.ap_offset, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xspr.h b/test/routines/level2/xspr.h index 819b1ca8..bab5c541 100644 --- a/test/routines/level2/xspr.h +++ b/test/routines/level2/xspr.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -66,7 +71,7 @@ class TestXspr { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Spr(args.layout, args.triangle, @@ -79,18 +84,37 @@ class TestXspr { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXspr(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.ap_mat(), args.ap_offset, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXspr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXspr(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + ap_mat_cpu, args.ap_offset); + buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xspr2.h b/test/routines/level2/xspr2.h index 43d66c9e..41a04cc0 100644 --- a/test/routines/level2/xspr2.h +++ b/test/routines/level2/xspr2.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXspr2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Spr2(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXspr2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXspr2(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.ap_mat(), args.ap_offset, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXspr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.ap_mat(), args.ap_offset, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXspr2(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + ap_mat_cpu, args.ap_offset); + buffers.ap_mat.Write(queue, args.ap_size, ap_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xsymv.h b/test/routines/level2/xsymv.h index 13473a3e..0576bc1f 100644 --- a/test/routines/level2/xsymv.h +++ b/test/routines/level2/xsymv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXsymv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Symv(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXsymv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsymv(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, args.beta, - buffers.y_vec(), args.y_offset, args.y_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsymv(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, args.beta, + buffers.y_vec(), args.y_offset, args.y_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXsymv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc, args.beta, + y_vec_cpu, args.y_offset, args.y_inc); + buffers.y_vec.Write(queue, args.y_size, y_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xsyr.h b/test/routines/level2/xsyr.h index 66b75c0c..062eea5a 100644 --- a/test/routines/level2/xsyr.h +++ b/test/routines/level2/xsyr.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -66,7 +71,7 @@ class TestXsyr { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr(args.layout, args.triangle, @@ -79,18 +84,37 @@ class TestXsyr { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyr(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXsyr(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xsyr2.h b/test/routines/level2/xsyr2.h index 32497a61..50bc3cea 100644 --- a/test/routines/level2/xsyr2.h +++ b/test/routines/level2/xsyr2.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXsyr2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr2(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXsyr2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyr2(static_cast(args.layout), - static_cast(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr2(static_cast(args.layout), + static_cast(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + std::vector y_vec_cpu(args.y_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXsyr2(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xtbmv.h b/test/routines/level2/xtbmv.h index dbdddb65..600b4131 100644 --- a/test/routines/level2/xtbmv.h +++ b/test/routines/level2/xtbmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -65,7 +70,7 @@ class TestXtbmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Tbmv(args.layout, args.triangle, args.a_transpose, args.diagonal, @@ -78,20 +83,41 @@ class TestXtbmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXtbmv(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - static_cast(args.diagonal), - args.n, args.kl, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXtbmv(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + static_cast(args.diagonal), + args.n, args.kl, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXtbmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.diagonal), + args.n, args.kl, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xtpmv.h b/test/routines/level2/xtpmv.h index 4425765e..fc0cf393 100644 --- a/test/routines/level2/xtpmv.h +++ b/test/routines/level2/xtpmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -65,7 +70,7 @@ class TestXtpmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Tpmv(args.layout, args.triangle, args.a_transpose, args.diagonal, @@ -78,20 +83,41 @@ class TestXtpmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXtpmv(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - static_cast(args.diagonal), - args.n, - buffers.ap_mat(), args.ap_offset, - buffers.x_vec(), args.x_offset, args.x_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXtpmv(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + static_cast(args.diagonal), + args.n, + buffers.ap_mat(), args.ap_offset, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector ap_mat_cpu(args.ap_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.ap_mat.Read(queue, args.ap_size, ap_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXtpmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.diagonal), + args.n, + ap_mat_cpu, args.ap_offset, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level2/xtrmv.h b/test/routines/level2/xtrmv.h index 1c0c6fd8..fec72124 100644 --- a/test/routines/level2/xtrmv.h +++ b/test/routines/level2/xtrmv.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -65,7 +70,7 @@ class TestXtrmv { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Trmv(args.layout, args.triangle, args.a_transpose, args.diagonal, @@ -78,20 +83,41 @@ class TestXtrmv { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXtrmv(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - static_cast(args.diagonal), - args.n, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.x_vec(), args.x_offset, args.x_inc, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXtrmv(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + static_cast(args.diagonal), + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector x_vec_cpu(args.x_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXtrmv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.diagonal), + args.n, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xgemm.h b/test/routines/level3/xgemm.h index 695b58b7..49a92936 100644 --- a/test/routines/level3/xgemm.h +++ b/test/routines/level3/xgemm.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -78,7 +83,7 @@ class TestXgemm { static Transposes GetBTransposes(const Transposes &all) { return all; } // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Gemm(args.layout, args.a_transpose, args.b_transpose, @@ -92,20 +97,43 @@ class TestXgemm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXgemm(static_cast(args.layout), - static_cast(args.a_transpose), - static_cast(args.b_transpose), - args.m, args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXgemm(static_cast(args.layout), + static_cast(args.a_transpose), + static_cast(args.b_transpose), + args.m, args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXgemm(convertToCBLAS(args.layout), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.b_transpose), + args.m, args.n, args.k, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xhemm.h b/test/routines/level3/xhemm.h index 7b7134e5..40538417 100644 --- a/test/routines/level3/xhemm.h +++ b/test/routines/level3/xhemm.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -78,7 +83,7 @@ class TestXhemm { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Hemm(args.layout, args.side, args.triangle, @@ -92,20 +97,43 @@ class TestXhemm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXhemm(static_cast(args.layout), - static_cast(args.side), - static_cast(args.triangle), - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXhemm(static_cast(args.layout), + static_cast(args.side), + static_cast(args.triangle), + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXhemm(convertToCBLAS(args.layout), + convertToCBLAS(args.side), + convertToCBLAS(args.triangle), + args.m, args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xher2k.h b/test/routines/level3/xher2k.h index a7fbfcbe..1ea2ad36 100644 --- a/test/routines/level3/xher2k.h +++ b/test/routines/level3/xher2k.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -76,7 +81,7 @@ class TestXher2k { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto alpha2 = T{args.alpha, args.alpha}; @@ -91,21 +96,45 @@ class TestXher2k { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto alpha2 = T{args.alpha, args.alpha}; - auto status = clblasXher2k(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - args.n, args.k, alpha2, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto alpha2 = T{args.alpha, args.alpha}; + auto status = clblasXher2k(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + args.n, args.k, alpha2, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + auto alpha2 = T{args.alpha, args.alpha}; + cblasXher2k(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + args.n, args.k, alpha2, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xherk.h b/test/routines/level3/xherk.h index f097672f..75a7c405 100644 --- a/test/routines/level3/xherk.h +++ b/test/routines/level3/xherk.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -69,7 +74,7 @@ class TestXherk { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Herk(args.layout, args.triangle, args.a_transpose, @@ -82,19 +87,39 @@ class TestXherk { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXherk(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXherk(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXherk(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + args.n, args.k, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xsymm.h b/test/routines/level3/xsymm.h index 03cf5de9..f867c238 100644 --- a/test/routines/level3/xsymm.h +++ b/test/routines/level3/xsymm.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -78,7 +83,7 @@ class TestXsymm { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Symm(args.layout, args.side, args.triangle, @@ -92,20 +97,43 @@ class TestXsymm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsymm(static_cast(args.layout), - static_cast(args.side), - static_cast(args.triangle), - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsymm(static_cast(args.layout), + static_cast(args.side), + static_cast(args.triangle), + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXsymm(convertToCBLAS(args.layout), + convertToCBLAS(args.side), + convertToCBLAS(args.triangle), + args.m, args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xsyr2k.h b/test/routines/level3/xsyr2k.h index 89e77f83..be4e1851 100644 --- a/test/routines/level3/xsyr2k.h +++ b/test/routines/level3/xsyr2k.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -76,7 +81,7 @@ class TestXsyr2k { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr2k(args.layout, args.triangle, args.a_transpose, @@ -90,20 +95,43 @@ class TestXsyr2k { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyr2k(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr2k(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXsyr2k(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + args.n, args.k, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xsyrk.h b/test/routines/level3/xsyrk.h index 8dacb5b3..7675e2aa 100644 --- a/test/routines/level3/xsyrk.h +++ b/test/routines/level3/xsyrk.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -69,7 +74,7 @@ class TestXsyrk { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syrk(args.layout, args.triangle, args.a_transpose, @@ -82,19 +87,39 @@ class TestXsyrk { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyrk(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyrk(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXsyrk(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + args.n, args.k, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { diff --git a/test/routines/level3/xtrmm.h b/test/routines/level3/xtrmm.h index 152cdf58..a085cb15 100644 --- a/test/routines/level3/xtrmm.h +++ b/test/routines/level3/xtrmm.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -69,7 +74,7 @@ class TestXtrmm { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Trmm(args.layout, args.side, args.triangle, args.a_transpose, args.diagonal, @@ -82,21 +87,43 @@ class TestXtrmm { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXtrmm(static_cast(args.layout), - static_cast(args.side), - static_cast(args.triangle), - static_cast(args.a_transpose), - static_cast(args.diagonal), - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXtrmm(static_cast(args.layout), + static_cast(args.side), + static_cast(args.triangle), + static_cast(args.a_transpose), + static_cast(args.diagonal), + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector b_mat_cpu(args.b_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.b_mat.Read(queue, args.b_size, b_mat_cpu); + cblasXtrmm(convertToCBLAS(args.layout), + convertToCBLAS(args.side), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.diagonal), + args.m, args.n, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, + b_mat_cpu, args.b_offset, args.b_ld); + buffers.b_mat.Write(queue, args.b_size, b_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { -- cgit v1.2.3