diff options
author | Ivan Shapovalov <intelfx@intelfx.name> | 2016-11-26 20:53:42 +0300 |
---|---|---|
committer | Ivan Shapovalov <intelfx@intelfx.name> | 2017-01-24 11:56:15 +0300 |
commit | 5bcd92f2974d94e8add31816d3b9d48a42289500 (patch) | |
tree | 51b24e302a08d62058311ead32ab626ce4c11263 /src/routines/level1 | |
parent | e943fe77d64f42ed1e57c9919de8ca6787760f2b (diff) |
Routine, Cache: generalize, reduce amount of copying in fast path
Implement a generalized Cache<K, V>. Two variants are provided: the
first one is based on std::map, using C++14-specific transparent
std::less<> and generalized std::map::find() to allow searching by tuple
of references. The second one is based on std::vector and O(n) lookup,
but remains C++11-compliant.
Diffstat (limited to 'src/routines/level1')
-rw-r--r-- | src/routines/level1/xamax.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xasum.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xaxpy.cpp | 3 | ||||
-rw-r--r-- | src/routines/level1/xcopy.cpp | 3 | ||||
-rw-r--r-- | src/routines/level1/xdot.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xnrm2.cpp | 5 | ||||
-rw-r--r-- | src/routines/level1/xscal.cpp | 3 | ||||
-rw-r--r-- | src/routines/level1/xswap.cpp | 3 |
8 files changed, 12 insertions, 20 deletions
diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp index e9efa1a7..40a66517 100644 --- a/src/routines/level1/xamax.cpp +++ b/src/routines/level1/xamax.cpp @@ -43,9 +43,8 @@ void Xamax<T>::DoAmax(const size_t n, TestVectorIndex(1, imax_buffer, imax_offset); // Retrieves the Xamax kernels from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel1 = Kernel(program, "Xamax"); - auto kernel2 = Kernel(program, "XamaxEpilogue"); + auto kernel1 = Kernel(program_, "Xamax"); + auto kernel2 = Kernel(program_, "XamaxEpilogue"); // Creates the buffer for intermediate values auto temp_size = 2*db_["WGS2"]; diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp index a242a5fa..b93b271c 100644 --- a/src/routines/level1/xasum.cpp +++ b/src/routines/level1/xasum.cpp @@ -43,9 +43,8 @@ void Xasum<T>::DoAsum(const size_t n, TestVectorScalar(1, asum_buffer, asum_offset); // Retrieves the Xasum kernels from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel1 = Kernel(program, "Xasum"); - auto kernel2 = Kernel(program, "XasumEpilogue"); + auto kernel1 = Kernel(program_, "Xasum"); + auto kernel2 = Kernel(program_, "XasumEpilogue"); // Creates the buffer for intermediate values auto temp_size = 2*db_["WGS2"]; diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp index 5436c5b7..39f61ef4 100644 --- a/src/routines/level1/xaxpy.cpp +++ b/src/routines/level1/xaxpy.cpp @@ -52,8 +52,7 @@ void Xaxpy<T>::DoAxpy(const size_t n, const T alpha, auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy"; // Retrieves the Xaxpy kernel from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments if (use_fast_kernel) { diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp index d86200c0..62889764 100644 --- a/src/routines/level1/xcopy.cpp +++ b/src/routines/level1/xcopy.cpp @@ -52,8 +52,7 @@ void Xcopy<T>::DoCopy(const size_t n, auto kernel_name = (use_fast_kernel) ? "XcopyFast" : "Xcopy"; // Retrieves the Xcopy kernel from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments if (use_fast_kernel) { diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp index 9d718913..9f9c0590 100644 --- a/src/routines/level1/xdot.cpp +++ b/src/routines/level1/xdot.cpp @@ -46,9 +46,8 @@ void Xdot<T>::DoDot(const size_t n, TestVectorScalar(1, dot_buffer, dot_offset); // Retrieves the Xdot kernels from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel1 = Kernel(program, "Xdot"); - auto kernel2 = Kernel(program, "XdotEpilogue"); + auto kernel1 = Kernel(program_, "Xdot"); + auto kernel2 = Kernel(program_, "XdotEpilogue"); // Creates the buffer for intermediate values auto temp_size = 2*db_["WGS2"]; diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp index 373820a4..aa341aff 100644 --- a/src/routines/level1/xnrm2.cpp +++ b/src/routines/level1/xnrm2.cpp @@ -43,9 +43,8 @@ void Xnrm2<T>::DoNrm2(const size_t n, TestVectorScalar(1, nrm2_buffer, nrm2_offset); // Retrieves the Xnrm2 kernels from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel1 = Kernel(program, "Xnrm2"); - auto kernel2 = Kernel(program, "Xnrm2Epilogue"); + auto kernel1 = Kernel(program_, "Xnrm2"); + auto kernel2 = Kernel(program_, "Xnrm2Epilogue"); // Creates the buffer for intermediate values auto temp_size = 2*db_["WGS2"]; diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp index 0521b1e5..9bc096e5 100644 --- a/src/routines/level1/xscal.cpp +++ b/src/routines/level1/xscal.cpp @@ -49,8 +49,7 @@ void Xscal<T>::DoScal(const size_t n, const T alpha, auto kernel_name = (use_fast_kernel) ? "XscalFast" : "Xscal"; // Retrieves the Xscal kernel from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments if (use_fast_kernel) { diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp index c9b97dc9..f046575f 100644 --- a/src/routines/level1/xswap.cpp +++ b/src/routines/level1/xswap.cpp @@ -52,8 +52,7 @@ void Xswap<T>::DoSwap(const size_t n, auto kernel_name = (use_fast_kernel) ? "XswapFast" : "Xswap"; // Retrieves the Xswap kernel from the compiled binary - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); + auto kernel = Kernel(program_, kernel_name); // Sets the kernel arguments if (use_fast_kernel) { |