summaryrefslogtreecommitdiff
path: root/src/routines/level1
diff options
context:
space:
mode:
authorIvan Shapovalov <intelfx@intelfx.name>2016-11-26 20:53:42 +0300
committerIvan Shapovalov <intelfx@intelfx.name>2017-01-24 11:56:15 +0300
commit5bcd92f2974d94e8add31816d3b9d48a42289500 (patch)
tree51b24e302a08d62058311ead32ab626ce4c11263 /src/routines/level1
parente943fe77d64f42ed1e57c9919de8ca6787760f2b (diff)
Routine, Cache: generalize, reduce amount of copying in fast path
Implement a generalized Cache<K, V>. Two variants are provided: the first one is based on std::map, using C++14-specific transparent std::less<> and generalized std::map::find() to allow searching by tuple of references. The second one is based on std::vector and O(n) lookup, but remains C++11-compliant.
Diffstat (limited to 'src/routines/level1')
-rw-r--r--src/routines/level1/xamax.cpp5
-rw-r--r--src/routines/level1/xasum.cpp5
-rw-r--r--src/routines/level1/xaxpy.cpp3
-rw-r--r--src/routines/level1/xcopy.cpp3
-rw-r--r--src/routines/level1/xdot.cpp5
-rw-r--r--src/routines/level1/xnrm2.cpp5
-rw-r--r--src/routines/level1/xscal.cpp3
-rw-r--r--src/routines/level1/xswap.cpp3
8 files changed, 12 insertions, 20 deletions
diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp
index e9efa1a7..40a66517 100644
--- a/src/routines/level1/xamax.cpp
+++ b/src/routines/level1/xamax.cpp
@@ -43,9 +43,8 @@ void Xamax<T>::DoAmax(const size_t n,
TestVectorIndex(1, imax_buffer, imax_offset);
// Retrieves the Xamax kernels from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xamax");
- auto kernel2 = Kernel(program, "XamaxEpilogue");
+ auto kernel1 = Kernel(program_, "Xamax");
+ auto kernel2 = Kernel(program_, "XamaxEpilogue");
// Creates the buffer for intermediate values
auto temp_size = 2*db_["WGS2"];
diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp
index a242a5fa..b93b271c 100644
--- a/src/routines/level1/xasum.cpp
+++ b/src/routines/level1/xasum.cpp
@@ -43,9 +43,8 @@ void Xasum<T>::DoAsum(const size_t n,
TestVectorScalar(1, asum_buffer, asum_offset);
// Retrieves the Xasum kernels from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xasum");
- auto kernel2 = Kernel(program, "XasumEpilogue");
+ auto kernel1 = Kernel(program_, "Xasum");
+ auto kernel2 = Kernel(program_, "XasumEpilogue");
// Creates the buffer for intermediate values
auto temp_size = 2*db_["WGS2"];
diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp
index 5436c5b7..39f61ef4 100644
--- a/src/routines/level1/xaxpy.cpp
+++ b/src/routines/level1/xaxpy.cpp
@@ -52,8 +52,7 @@ void Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy";
// Retrieves the Xaxpy kernel from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
if (use_fast_kernel) {
diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp
index d86200c0..62889764 100644
--- a/src/routines/level1/xcopy.cpp
+++ b/src/routines/level1/xcopy.cpp
@@ -52,8 +52,7 @@ void Xcopy<T>::DoCopy(const size_t n,
auto kernel_name = (use_fast_kernel) ? "XcopyFast" : "Xcopy";
// Retrieves the Xcopy kernel from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
if (use_fast_kernel) {
diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp
index 9d718913..9f9c0590 100644
--- a/src/routines/level1/xdot.cpp
+++ b/src/routines/level1/xdot.cpp
@@ -46,9 +46,8 @@ void Xdot<T>::DoDot(const size_t n,
TestVectorScalar(1, dot_buffer, dot_offset);
// Retrieves the Xdot kernels from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xdot");
- auto kernel2 = Kernel(program, "XdotEpilogue");
+ auto kernel1 = Kernel(program_, "Xdot");
+ auto kernel2 = Kernel(program_, "XdotEpilogue");
// Creates the buffer for intermediate values
auto temp_size = 2*db_["WGS2"];
diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp
index 373820a4..aa341aff 100644
--- a/src/routines/level1/xnrm2.cpp
+++ b/src/routines/level1/xnrm2.cpp
@@ -43,9 +43,8 @@ void Xnrm2<T>::DoNrm2(const size_t n,
TestVectorScalar(1, nrm2_buffer, nrm2_offset);
// Retrieves the Xnrm2 kernels from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xnrm2");
- auto kernel2 = Kernel(program, "Xnrm2Epilogue");
+ auto kernel1 = Kernel(program_, "Xnrm2");
+ auto kernel2 = Kernel(program_, "Xnrm2Epilogue");
// Creates the buffer for intermediate values
auto temp_size = 2*db_["WGS2"];
diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp
index 0521b1e5..9bc096e5 100644
--- a/src/routines/level1/xscal.cpp
+++ b/src/routines/level1/xscal.cpp
@@ -49,8 +49,7 @@ void Xscal<T>::DoScal(const size_t n, const T alpha,
auto kernel_name = (use_fast_kernel) ? "XscalFast" : "Xscal";
// Retrieves the Xscal kernel from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
if (use_fast_kernel) {
diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp
index c9b97dc9..f046575f 100644
--- a/src/routines/level1/xswap.cpp
+++ b/src/routines/level1/xswap.cpp
@@ -52,8 +52,7 @@ void Xswap<T>::DoSwap(const size_t n,
auto kernel_name = (use_fast_kernel) ? "XswapFast" : "Xswap";
// Retrieves the Xswap kernel from the compiled binary
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
+ auto kernel = Kernel(program_, kernel_name);
// Sets the kernel arguments
if (use_fast_kernel) {