From 69ed46c8da69ee18338eca5102ead43410cc01b5 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 2 Feb 2018 21:18:37 +0100 Subject: Implemented the XHAD Hadamard product routine --- test/routines/levelx/xhad.hpp | 48 +++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/routines/levelx/xhad.hpp b/test/routines/levelx/xhad.hpp index fc47a7d6..3e40de87 100644 --- a/test/routines/levelx/xhad.hpp +++ b/test/routines/levelx/xhad.hpp @@ -21,12 +21,43 @@ namespace clblast { // ================================================================================================= +template +StatusCode RunReference(const Arguments &args, BuffersHost &buffers_host) { + for (auto index = size_t{0}; index < args.n; ++index) { + const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset]; + const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset]; + const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset]; + buffers_host.c_mat[index] = args.alpha * x * y + args.beta * z; + } + return StatusCode::kSuccess; +} + +// Half-precision version calling the above reference implementation after conversions +template <> +StatusCode RunReference(const Arguments &args, BuffersHost &buffers_host) { + auto x_buffer2 = HalfToFloatBuffer(buffers_host.x_vec); + auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec); + auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat); + auto dummy = std::vector(0); + auto buffers2 = BuffersHost{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy}; + auto args2 = Arguments(); + args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size; + args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n; + args2.x_offset = args.x_offset; args2.y_offset = args.y_offset; + args2.alpha = HalfToFloat(args.alpha); args2.beta = HalfToFloat(args.beta); + auto status = RunReference(args2, buffers2); + FloatToHalfBuffer(buffers_host.c_mat, buffers2.c_mat); + return status; +} + +// ================================================================================================= + // See comment at top of file for a description of the class template class TestXhad { public: - // The BLAS level: 4 for the extra routines + // The BLAS level: 4 for the extra routines (note: tested with matrix-size values for 'n') static size_t BLASLevel() { return 4; } // The list of arguments relevant for this routine @@ -34,7 +65,7 @@ public: return {kArgN, kArgXInc, kArgYInc, kArgXOffset, kArgYOffset, - kArgAlpha}; + kArgAlpha, kArgBeta}; } static std::vector BuffersIn() { return {kBufVecX, kBufVecY, kBufMatC}; } static std::vector BuffersOut() { return {kBufMatC}; } @@ -134,19 +165,6 @@ public: } }; -// ================================================================================================= - -template -StatusCode RunReference(const Arguments &args, BuffersHost &buffers_host) { - for (auto index = size_t{0}; index < args.n; ++index) { - const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset]; - const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset]; - const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset]; - buffers_host.c_mat[index] = x * y * args.alpha + z * args.beta; - } - return StatusCode::kSuccess; -} - // ================================================================================================= } // namespace clblast -- cgit v1.2.3