diff options
author | Ivan Shapovalov <intelfx@intelfx.name> | 2016-11-26 20:53:42 +0300 |
---|---|---|
committer | Ivan Shapovalov <intelfx@intelfx.name> | 2017-01-24 11:56:15 +0300 |
commit | 5bcd92f2974d94e8add31816d3b9d48a42289500 (patch) | |
tree | 51b24e302a08d62058311ead32ab626ce4c11263 /src/routines/level3/xsymm.cpp | |
parent | e943fe77d64f42ed1e57c9919de8ca6787760f2b (diff) |
Routine, Cache: generalize, reduce amount of copying in fast path
Implement a generalized Cache<K, V>. Two variants are provided: the
first one is based on std::map, using C++14-specific transparent
std::less<> and generalized std::map::find() to allow searching by tuple
of references. The second one is based on std::vector and O(n) lookup,
but remains C++11-compliant.
Diffstat (limited to 'src/routines/level3/xsymm.cpp')
-rw-r--r-- | src/routines/level3/xsymm.cpp | 15 |
1 files changed, 7 insertions, 8 deletions
diff --git a/src/routines/level3/xsymm.cpp b/src/routines/level3/xsymm.cpp index d7f771d1..969edfc8 100644 --- a/src/routines/level3/xsymm.cpp +++ b/src/routines/level3/xsymm.cpp @@ -30,12 +30,12 @@ Xsymm<T>::Xsymm(Queue &queue, EventPointer event, const std::string &name): // The main routine template <typename T> void Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle triangle, - const size_t m, const size_t n, - const T alpha, - const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, - const T beta, - const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld, + const T beta, + const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) { // Makes sure all dimensions are larger than zero if ((m == 0) || (n == 0) ) { throw BLASError(StatusCode::kInvalidDimension); } @@ -58,8 +58,7 @@ void Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle trian // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the arguments for the symmetric-to-squared kernel kernel.SetArgument(0, static_cast<int>(k)); |