From 5bcd92f2974d94e8add31816d3b9d48a42289500 Mon Sep 17 00:00:00 2001 From: Ivan Shapovalov Date: Sat, 26 Nov 2016 20:53:42 +0300 Subject: Routine, Cache: generalize, reduce amount of copying in fast path Implement a generalized Cache. Two variants are provided: the first one is based on std::map, using C++14-specific transparent std::less<> and generalized std::map::find() to allow searching by tuple of references. The second one is based on std::vector and O(n) lookup, but remains C++11-compliant. --- src/routines/level3/xsymm.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/routines/level3/xsymm.cpp') diff --git a/src/routines/level3/xsymm.cpp b/src/routines/level3/xsymm.cpp index d7f771d1..969edfc8 100644 --- a/src/routines/level3/xsymm.cpp +++ b/src/routines/level3/xsymm.cpp @@ -30,12 +30,12 @@ Xsymm::Xsymm(Queue &queue, EventPointer event, const std::string &name): // The main routine template void Xsymm::DoSymm(const Layout layout, const Side side, const Triangle triangle, - const size_t m, const size_t n, - const T alpha, - const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, - const T beta, - const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { + const size_t m, const size_t n, + const T alpha, + const Buffer &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { throw BLASError(StatusCode::kInvalidDimension); } @@ -58,8 +58,7 @@ void Xsymm::DoSymm(const Layout layout, const Side side, const Triangle trian // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards - const auto program = GetProgramFromCache(context_, PrecisionValue(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the arguments for the symmetric-to-squared kernel kernel.SetArgument(0, static_cast(k)); -- cgit v1.2.3