From b84d2296b87ac212474af855d916b12adf96bdb7 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 1 Apr 2017 13:36:24 +0200 Subject: Separated host-device and device-host memory copies from execution of the CBLAS reference code; for fair timing and code de-duplication --- test/routines/level3/xherk.hpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'test/routines/level3/xherk.hpp') diff --git a/test/routines/level3/xherk.hpp b/test/routines/level3/xherk.hpp index e93d887a..b1ce83e0 100644 --- a/test/routines/level3/xherk.hpp +++ b/test/routines/level3/xherk.hpp @@ -45,6 +45,8 @@ class TestXherk { kArgAOffset, kArgCOffset, kArgAlpha, kArgBeta}; } + static std::vector BuffersIn() { return {kBufMatA, kBufMatC}; } + static std::vector BuffersOut() { return {kBufMatC}; } // Describes how to obtain the sizes of the buffers static size_t GetSizeA(const Arguments &args) { @@ -110,18 +112,13 @@ class TestXherk { // Describes how to run the CPU BLAS routine (for correctness/performance comparison) #ifdef CLBLAST_REF_CBLAS - static StatusCode RunReference2(const Arguments &args, Buffers &buffers, Queue &queue) { - std::vector a_mat_cpu(args.a_size, static_cast(0)); - std::vector c_mat_cpu(args.c_size, static_cast(0)); - buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); - buffers.c_mat.Read(queue, args.c_size, c_mat_cpu); + static StatusCode RunReference2(const Arguments &args, BuffersHost &buffers_host, Queue&) { cblasXherk(convertToCBLAS(args.layout), convertToCBLAS(args.triangle), convertToCBLAS(args.a_transpose), args.n, args.k, args.alpha, - a_mat_cpu, args.a_offset, args.a_ld, args.beta, - c_mat_cpu, args.c_offset, args.c_ld); - buffers.c_mat.Write(queue, args.c_size, c_mat_cpu); + buffers_host.a_mat, args.a_offset, args.a_ld, args.beta, + buffers_host.c_mat, args.c_offset, args.c_ld); return StatusCode::kSuccess; } #endif -- cgit v1.2.3