From 1a82861a902e17f15486664b340c50530cce6542 Mon Sep 17 00:00:00 2001 From: cnugteren Date: Sat, 2 Apr 2016 11:58:00 -0700 Subject: Added support for testing (performance and correctness) against a CPU BLAS library --- test/routines/level3/xsyrk.h | 55 ++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 15 deletions(-) (limited to 'test/routines/level3/xsyrk.h') diff --git a/test/routines/level3/xsyrk.h b/test/routines/level3/xsyrk.h index 8dacb5b3..7675e2aa 100644 --- a/test/routines/level3/xsyrk.h +++ b/test/routines/level3/xsyrk.h @@ -19,7 +19,12 @@ #include #include -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -69,7 +74,7 @@ class TestXsyrk { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments &args, const Buffers &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments &args, Buffers &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syrk(args.layout, args.triangle, args.a_transpose, @@ -82,19 +87,39 @@ class TestXsyrk { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments &args, const Buffers &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyrk(static_cast(args.layout), - static_cast(args.triangle), - static_cast(args.a_transpose), - args.n, args.k, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, args.beta, - buffers.c_mat(), args.c_offset, args.c_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments &args, Buffers &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyrk(static_cast(args.layout), + static_cast(args.triangle), + static_cast(args.a_transpose), + args.n, args.k, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, args.beta, + buffers.c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { + std::vector a_mat_cpu(args.a_size, static_cast(0)); + std::vector c_mat_cpu(args.c_size, static_cast(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + cblasXsyrk(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + args.n, args.k, args.alpha, + a_mat_cpu, args.a_offset, args.a_ld, args.beta, + c_mat_cpu, args.c_offset, args.c_ld); + buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector DownloadResult(const Arguments &args, Buffers &buffers, Queue &queue) { -- cgit v1.2.3