diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-04-02 11:58:00 -0700 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-04-02 11:58:00 -0700 |
commit | 1a82861a902e17f15486664b340c50530cce6542 (patch) | |
tree | 501c25dd3bcf86525052eb28c2c7ad4c6cb51647 /test/routines/level2/xsyr2.h | |
parent | 5c83217cf256984573924e8f89c46f393a5fcfcd (diff) |
Added support for testing (performance and correctness) against a CPU BLAS library
Diffstat (limited to 'test/routines/level2/xsyr2.h')
-rw-r--r-- | test/routines/level2/xsyr2.h | 57 |
1 files changed, 42 insertions, 15 deletions
diff --git a/test/routines/level2/xsyr2.h b/test/routines/level2/xsyr2.h index 32497a61..50bc3cea 100644 --- a/test/routines/level2/xsyr2.h +++ b/test/routines/level2/xsyr2.h @@ -19,7 +19,12 @@ #include <vector> #include <string> -#include "wrapper_clblas.h" +#ifdef CLBLAST_REF_CLBLAS + #include "wrapper_clblas.h" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "wrapper_cblas.h" +#endif namespace clblast { // ================================================================================================= @@ -70,7 +75,7 @@ class TestXsyr2 { static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine // Describes how to run the CLBlast routine - static StatusCode RunRoutine(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto queue_plain = queue(); auto event = cl_event{}; auto status = Syr2(args.layout, args.triangle, @@ -84,19 +89,41 @@ class TestXsyr2 { } // Describes how to run the clBLAS routine (for correctness/performance comparison) - static StatusCode RunReference(const Arguments<T> &args, const Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = clblasXsyr2(static_cast<clblasOrder>(args.layout), - static_cast<clblasUplo>(args.triangle), - args.n, args.alpha, - buffers.x_vec(), args.x_offset, args.x_inc, - buffers.y_vec(), args.y_offset, args.y_inc, - buffers.a_mat(), args.a_offset, args.a_ld, - 1, &queue_plain, 0, nullptr, &event); - clWaitForEvents(1, &event); - return static_cast<StatusCode>(status); - } + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyr2(static_cast<clblasOrder>(args.layout), + static_cast<clblasUplo>(args.triangle), + args.n, args.alpha, + buffers.x_vec(), args.x_offset, args.x_inc, + buffers.y_vec(), args.y_offset, args.y_inc, + buffers.a_mat(), args.a_offset, args.a_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast<StatusCode>(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); + std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); + std::vector<T> y_vec_cpu(args.y_size, static_cast<T>(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + buffers.y_vec.Read(queue, args.y_size, y_vec_cpu); + cblasXsyr2(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + args.n, args.alpha, + x_vec_cpu, args.x_offset, args.x_inc, + y_vec_cpu, args.y_offset, args.y_inc, + a_mat_cpu, args.a_offset, args.a_ld); + buffers.a_mat.Write(queue, args.a_size, a_mat_cpu); + return StatusCode::kSuccess; + } + #endif // Describes how to download the results of the computation (more importantly: which buffer) static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { |