diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-02-02 21:18:37 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-02-02 21:18:37 +0100 |
commit | 69ed46c8da69ee18338eca5102ead43410cc01b5 (patch) | |
tree | 575c4eef2a3210117a574f25f81662c503ec207d /test | |
parent | ae66782eabc574a507b8cfe2b83f2df23b1a36c1 (diff) |
Implemented the XHAD Hadamard product routine
Diffstat (limited to 'test')
-rw-r--r-- | test/routines/levelx/xhad.hpp | 48 |
1 files changed, 33 insertions, 15 deletions
diff --git a/test/routines/levelx/xhad.hpp b/test/routines/levelx/xhad.hpp index fc47a7d6..3e40de87 100644 --- a/test/routines/levelx/xhad.hpp +++ b/test/routines/levelx/xhad.hpp @@ -21,12 +21,43 @@ namespace clblast { // ================================================================================================= +template <typename T> +StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) { + for (auto index = size_t{0}; index < args.n; ++index) { + const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset]; + const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset]; + const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset]; + buffers_host.c_mat[index] = args.alpha * x * y + args.beta * z; + } + return StatusCode::kSuccess; +} + +// Half-precision version calling the above reference implementation after conversions +template <> +StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) { + auto x_buffer2 = HalfToFloatBuffer(buffers_host.x_vec); + auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec); + auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat); + auto dummy = std::vector<float>(0); + auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy}; + auto args2 = Arguments<float>(); + args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size; + args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n; + args2.x_offset = args.x_offset; args2.y_offset = args.y_offset; + args2.alpha = HalfToFloat(args.alpha); args2.beta = HalfToFloat(args.beta); + auto status = RunReference(args2, buffers2); + FloatToHalfBuffer(buffers_host.c_mat, buffers2.c_mat); + return status; +} + +// ================================================================================================= + // See comment at top of file for a description of the class template <typename T> class TestXhad { public: - // The BLAS level: 4 for the extra routines + // The BLAS level: 4 for the extra routines (note: tested with matrix-size values for 'n') static size_t BLASLevel() { return 4; } // The list of arguments relevant for this routine @@ -34,7 +65,7 @@ public: return {kArgN, kArgXInc, kArgYInc, kArgXOffset, kArgYOffset, - kArgAlpha}; + kArgAlpha, kArgBeta}; } static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY, kBufMatC}; } static std::vector<std::string> BuffersOut() { return {kBufMatC}; } @@ -135,19 +166,6 @@ public: }; // ================================================================================================= - -template <typename T> -StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) { - for (auto index = size_t{0}; index < args.n; ++index) { - const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset]; - const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset]; - const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset]; - buffers_host.c_mat[index] = x * y * args.alpha + z * args.beta; - } - return StatusCode::kSuccess; -} - -// ================================================================================================= } // namespace clblast // CLBLAST_TEST_ROUTINES_XHAD_H_ |