From bf50c4e53e1815d4b376f35b5be5c747cd857414 Mon Sep 17 00:00:00 2001 From: Tarmo Räntilä Date: Mon, 9 Dec 2019 22:13:52 +0200 Subject: Reduce TestMatrix calls for xgemmbatched. Replace the looped test by a single one with the maximal found offset. --- src/routines/levelx/xgemmbatched.cpp | 8 +++----- src/utilities/buffer_test.hpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/routines/levelx/xgemmbatched.cpp b/src/routines/levelx/xgemmbatched.cpp index b12b8734..0d7ae5ab 100644 --- a/src/routines/levelx/xgemmbatched.cpp +++ b/src/routines/levelx/xgemmbatched.cpp @@ -79,11 +79,9 @@ void XgemmBatched::DoGemmBatched(const Layout layout, const Transpose a_trans gemm_kernel_id); // Tests the matrices for validity - for (auto batch = size_t{0}; batch < batch_count; ++batch) { - TestMatrixA(a_one, a_two, a_buffer, a_offsets[batch], a_ld, false); // don't test for invalid LD - TestMatrixB(b_one, b_two, b_buffer, b_offsets[batch], b_ld, false); // don't test for invalid LD - TestMatrixC(c_one, c_two, c_buffer, c_offsets[batch], c_ld); - } + TestBatchedMatrixA(a_one, a_two, a_buffer, a_offsets, a_ld, false); // don't test for invalid LD + TestBatchedMatrixB(b_one, b_two, b_buffer, b_offsets, b_ld, false); // don't test for invalid LD + TestBatchedMatrixC(c_one, c_two, c_buffer, c_offsets, c_ld); // Upload the scalar arguments to the device auto alphas_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); diff --git a/src/utilities/buffer_test.hpp b/src/utilities/buffer_test.hpp index fd071434..9cecce97 100644 --- a/src/utilities/buffer_test.hpp +++ b/src/utilities/buffer_test.hpp @@ -15,6 +15,9 @@ #ifndef CLBLAST_BUFFER_TEST_H_ #define CLBLAST_BUFFER_TEST_H_ +#include +#include + #include "utilities/utilities.hpp" namespace clblast { @@ -104,6 +107,32 @@ void TestVectorIndex(const size_t n, const Buffer &buffer, const size_t offse } catch (const Error &e) { throw BLASError(StatusCode::kInvalidVectorScalar, e.what()); } } +// ================================================================================================= + +// Tests matrix 'A' for validity in a batched setting +template +void TestBatchedMatrixA(const size_t one, const size_t two, const Buffer& buffer, + const std::vector &offsets, const size_t ld, const bool test_lead_dim = true) { + const auto max_offset = *std::max_element(offsets.begin(), offsets.end()); + TestMatrixA(one, two, buffer, max_offset, ld, test_lead_dim); +} + +// Tests matrix 'B' for validity in a batched setting +template +void TestBatchedMatrixB(const size_t one, const size_t two, const Buffer& buffer, + const std::vector& offsets, const size_t ld, const bool test_lead_dim = true) { + const auto max_offset = *std::max_element(offsets.begin(), offsets.end()); + TestMatrixB(one, two, buffer, max_offset, ld, test_lead_dim); +} + +// Tests matrix 'C' for validity in a batched setting +template +void TestBatchedMatrixC(const size_t one, const size_t two, const Buffer& buffer, + const std::vector& offsets, const size_t ld) { + const auto max_offset = *std::max_element(offsets.begin(), offsets.end()); + TestMatrixC(one, two, buffer, max_offset, ld); +} + // ================================================================================================= } // namespace clblast -- cgit v1.2.3