summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--include/clblast.h2
-rw-r--r--include/clblast_c.h2
-rw-r--r--include/internal/cache.h30
-rw-r--r--include/internal/clpp11.h25
-rw-r--r--include/internal/routine.h16
-rw-r--r--src/cache.cc56
-rw-r--r--src/clblast.cc4
-rw-r--r--src/clblast_c.cc6
-rw-r--r--src/routine.cc5
-rw-r--r--src/routines/level1/xamax.cc2
-rw-r--r--src/routines/level1/xasum.cc2
-rw-r--r--src/routines/level1/xaxpy.cc2
-rw-r--r--src/routines/level1/xcopy.cc2
-rw-r--r--src/routines/level1/xdot.cc2
-rw-r--r--src/routines/level1/xnrm2.cc2
-rw-r--r--src/routines/level1/xscal.cc2
-rw-r--r--src/routines/level1/xswap.cc2
-rw-r--r--src/routines/level2/xgemv.cc2
-rw-r--r--src/routines/level2/xger.cc2
-rw-r--r--src/routines/level2/xher.cc2
-rw-r--r--src/routines/level2/xher2.cc2
-rw-r--r--src/routines/level3/xgemm.cc2
-rw-r--r--src/routines/level3/xhemm.cc2
-rw-r--r--src/routines/level3/xher2k.cc2
-rw-r--r--src/routines/level3/xherk.cc2
-rw-r--r--src/routines/level3/xsymm.cc2
-rw-r--r--src/routines/level3/xsyr2k.cc2
-rw-r--r--src/routines/level3/xsyrk.cc2
-rw-r--r--src/routines/level3/xtrmm.cc2
30 files changed, 105 insertions, 83 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 787793f0..6dc1ed49 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -5,7 +5,7 @@ Development version (next release)
- Performance and correctness tests can now (on top of clBLAS) be performed against CPU BLAS libraries
- Fixed the use of events within the library
- Changed the enum parameters to match the raw values of the cblas standard
-- Added a function to clear the cache of previously compiled programs
+- Fixed the cache of previously compiled binaries and added a function to clear it
- Added level-1 routines:
* SNRM2/DNRM2/ScNRM2/DzNRM2
* SASUM/DASUM/ScASUM/DzASUM
diff --git a/include/clblast.h b/include/clblast.h
index 57fca119..e473adbe 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -556,7 +556,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
// for the same device. This cache can be cleared to free up system memory or in case of debugging.
-StatusCode ClearCompiledProgramCache();
+StatusCode ClearCache();
// =================================================================================================
diff --git a/include/clblast_c.h b/include/clblast_c.h
index e23f0305..45e50cff 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -1074,7 +1074,7 @@ StatusCode PUBLIC_API CLBlastZtrsm(const Layout layout, const Side side, const T
// CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on
// for the same device. This cache can be cleared to free up system memory or in case of debugging.
-StatusCode PUBLIC_API CLBlastClearCompiledProgramCache();
+StatusCode PUBLIC_API CLBlastClearCache();
// =================================================================================================
diff --git a/include/internal/cache.h b/include/internal/cache.h
index 44fad68d..fa33b78f 100644
--- a/include/internal/cache.h
+++ b/include/internal/cache.h
@@ -24,9 +24,9 @@ namespace clblast {
namespace cache {
// =================================================================================================
-// The cache of compiled OpenCL programs, along with some meta-data
-struct ProgramCache {
- Program program;
+// The cache of compiled OpenCL binaries, along with some meta-data
+struct BinaryCache {
+ std::string binary;
std::string device_name;
Precision precision;
std::string routine_name_;
@@ -41,28 +41,28 @@ struct ProgramCache {
};
// The actual cache, implemented as a vector of the above data-type, and its mutex
-static std::vector<ProgramCache> program_cache_;
-static std::mutex program_cache_mutex_;
+static std::vector<BinaryCache> binary_cache_;
+static std::mutex binary_cache_mutex_;
// =================================================================================================
-// Stores the compiled program in the cache
-void StoreProgramToCache(const Program& program, const std::string &device_name,
- const Precision &precision, const std::string &routine_name);
+// Stores the compiled binary in the cache
+void StoreBinaryToCache(const std::string& binary, const std::string &device_name,
+ const Precision &precision, const std::string &routine_name);
-// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
+// Queries the cache and retrieves a matching binary. Assumes that the match is available, throws
// otherwise.
-const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name);
+const std::string& GetBinaryFromCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name);
// Queries the cache to see whether or not the compiled kernel is already there
-bool ProgramIsInCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name);
+bool BinaryIsInCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name);
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode ClearCompiledProgramCache();
+// Clears the cache of stored binaries
+StatusCode ClearCache();
// =================================================================================================
} // namespace cache
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h
index 543d423a..b865ab1e 100644
--- a/include/internal/clpp11.h
+++ b/include/internal/clpp11.h
@@ -283,7 +283,7 @@ class Program {
public:
// Note that there is no constructor based on the regular OpenCL data-type because of extra state
- // Regular constructor with memory management
+ // Source-based constructor with memory management
explicit Program(const Context &context, std::string source):
program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
length_(source.length()),
@@ -294,6 +294,22 @@ class Program {
CheckError(status);
}
+ // Binary-based constructor with memory management
+ explicit Program(const Device &device, const Context &context, const std::string& binary):
+ program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
+ length_(binary.length()),
+ source_(binary),
+ source_ptr_(&source_[0]) {
+ auto status1 = CL_SUCCESS;
+ auto status2 = CL_SUCCESS;
+ const cl_device_id dev = device();
+ *program_ = clCreateProgramWithBinary(context(), 1, &dev, &length_,
+ reinterpret_cast<const unsigned char**>(&source_ptr_),
+ &status1, &status2);
+ CheckError(status1);
+ CheckError(status2);
+ }
+
// Compiles the device program and returns whether or not there where any warnings/errors
BuildStatus Build(const Device &device, std::vector<std::string> &options) {
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
@@ -322,7 +338,7 @@ class Program {
return result;
}
- // Retrieves an intermediate representation of the compiled program
+ // Retrieves a binary or an intermediate representation of the compiled program
std::string GetIR() const {
auto bytes = size_t{0};
CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
@@ -338,7 +354,7 @@ class Program {
private:
std::shared_ptr<cl_program> program_;
size_t length_;
- std::string source_;
+ std::string source_; // Note: the source can also be a binary or IR
const char* source_ptr_;
};
@@ -633,7 +649,8 @@ class Kernel {
// Launches the kernel while waiting for other events
CheckError(clEnqueueNDRangeKernel(queue(), *kernel_, static_cast<cl_uint>(global.size()),
nullptr, global.data(), local.data(),
- waitForEventsPlain.size(), waitForEventsPlain.data(),
+ static_cast<cl_uint>(waitForEventsPlain.size()),
+ waitForEventsPlain.data(),
event));
}
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 013769d8..32be6012 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -84,19 +84,23 @@ class Routine {
const bool upper = false, const bool lower = false,
const bool diagonal_imag_zero = false);
- // Stores a newly compiled program into the cache
- void StoreProgramToCache(const Program& program) const {
- return cache::StoreProgramToCache(program, device_name_, precision_, routine_name_);
+ // Stores a newly compiled binary into the cache
+ void StoreBinaryToCache(const std::string& binary) const {
+ return cache::StoreBinaryToCache(binary, device_name_, precision_, routine_name_);
}
// Queries the cache and retrieve either a matching program or a boolean whether a match exists.
// The first assumes that the program is available in the cache and will throw an exception
// otherwise.
- const Program& GetProgramFromCache() const {
- return cache::GetProgramFromCache(device_name_, precision_, routine_name_);
+ Program GetProgramFromCache() const {
+ auto& binary = cache::GetBinaryFromCache(device_name_, precision_, routine_name_);
+ auto program = Program(device_, context_, binary);
+ auto options = std::vector<std::string>();
+ program.Build(device_, options);
+ return program;
}
bool ProgramIsInCache() const {
- return cache::ProgramIsInCache(device_name_, precision_, routine_name_);
+ return cache::BinaryIsInCache(device_name_, precision_, routine_name_);
}
// Non-static variable for the precision. Note that the same variable (but static) might exist in
diff --git a/src/cache.cc b/src/cache.cc
index beeb1b35..18731a51 100644
--- a/src/cache.cc
+++ b/src/cache.cc
@@ -21,50 +21,50 @@ namespace clblast {
namespace cache {
// =================================================================================================
-// Stores the compiled program in the cache
-void StoreProgramToCache(const Program& program, const std::string &device_name,
- const Precision &precision, const std::string &routine_name) {
- program_cache_mutex_.lock();
- program_cache_.push_back({program, device_name, precision, routine_name});
- program_cache_mutex_.unlock();
+// Stores the compiled binary or IR in the cache
+void StoreBinaryToCache(const std::string& binary, const std::string &device_name,
+ const Precision &precision, const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ binary_cache_.push_back({binary, device_name, precision, routine_name});
+ binary_cache_mutex_.unlock();
}
-// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
+// Queries the cache and retrieves a matching binary. Assumes that the match is available, throws
// otherwise.
-const Program& GetProgramFromCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name) {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name, precision, routine_name)) {
- program_cache_mutex_.unlock();
- return cached_program.program;
+const std::string& GetBinaryFromCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ for (auto &cached_binary: binary_cache_) {
+ if (cached_binary.MatchInCache(device_name, precision, routine_name)) {
+ binary_cache_mutex_.unlock();
+ return cached_binary.binary;
}
}
- program_cache_mutex_.unlock();
- throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none.");
+ binary_cache_mutex_.unlock();
+ throw std::runtime_error("Internal CLBlast error: Expected binary in cache, but found none.");
}
// Queries the cache to see whether or not the compiled kernel is already there
-bool ProgramIsInCache(const std::string &device_name, const Precision &precision,
- const std::string &routine_name) {
- program_cache_mutex_.lock();
- for (auto &cached_program: program_cache_) {
- if (cached_program.MatchInCache(device_name, precision, routine_name)) {
- program_cache_mutex_.unlock();
+bool BinaryIsInCache(const std::string &device_name, const Precision &precision,
+ const std::string &routine_name) {
+ binary_cache_mutex_.lock();
+ for (auto &cached_binary: binary_cache_) {
+ if (cached_binary.MatchInCache(device_name, precision, routine_name)) {
+ binary_cache_mutex_.unlock();
return true;
}
}
- program_cache_mutex_.unlock();
+ binary_cache_mutex_.unlock();
return false;
}
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode ClearCompiledProgramCache() {
- program_cache_mutex_.lock();
- program_cache_.clear();
- program_cache_mutex_.unlock();
+// Clears the cache of stored binaries
+StatusCode ClearCache() {
+ binary_cache_mutex_.lock();
+ binary_cache_.clear();
+ binary_cache_mutex_.unlock();
return StatusCode::kSuccess;
}
diff --git a/src/clblast.cc b/src/clblast.cc
index fac5a539..fe79d7c1 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -1854,8 +1854,8 @@ template StatusCode PUBLIC_API Trsm<double2>(const Layout, const Side, const Tri
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode ClearCompiledProgramCache() { return cache::ClearCompiledProgramCache(); }
+// Clears the cache of stored binaries
+StatusCode ClearCache() { return cache::ClearCache(); }
// =================================================================================================
} // namespace clblast
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 72d93c4b..172bce64 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -2343,9 +2343,9 @@ StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle tri
// =================================================================================================
-// Clears the cache of stored program binaries
-StatusCode CLBlastClearCompiledProgramCache() {
- return static_cast<StatusCode>(clblast::ClearCompiledProgramCache());
+// Clears the cache of stored binaries
+StatusCode CLBlastClearCache() {
+ return static_cast<StatusCode>(clblast::ClearCache());
}
// =================================================================================================
diff --git a/src/routine.cc b/src/routine.cc
index e0a75e41..cd4d82fb 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -96,8 +96,9 @@ StatusCode Routine<T>::SetUp() {
}
if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
- // Store the compiled program in the cache (atomic for thread-safety)
- StoreProgramToCache(program);
+ // Store the compiled kernel in the cache
+ auto binary = program.GetIR();
+ StoreBinaryToCache(binary);
} catch (...) { return StatusCode::kBuildProgramFailure; }
}
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc
index ffdfa496..33bd72a6 100644
--- a/src/routines/level1/xamax.cc
+++ b/src/routines/level1/xamax.cc
@@ -55,7 +55,7 @@ StatusCode Xamax<T>::DoAmax(const size_t n,
// Retrieves the Xamax kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xamax");
auto kernel2 = Kernel(program, "XamaxEpilogue");
diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc
index 5799e25a..ea33d7e1 100644
--- a/src/routines/level1/xasum.cc
+++ b/src/routines/level1/xasum.cc
@@ -55,7 +55,7 @@ StatusCode Xasum<T>::DoAsum(const size_t n,
// Retrieves the Xasum kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xasum");
auto kernel2 = Kernel(program, "XasumEpilogue");
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index 37d23543..96809a57 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -64,7 +64,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// Retrieves the Xaxpy kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 04508383..d34482ce 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -64,7 +64,7 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
// Retrieves the Xcopy kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index 4813a004..b2513485 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -59,7 +59,7 @@ StatusCode Xdot<T>::DoDot(const size_t n,
// Retrieves the Xdot kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xdot");
auto kernel2 = Kernel(program, "XdotEpilogue");
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index ceabe586..86166a0c 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -55,7 +55,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
// Retrieves the Xnrm2 kernels from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel1 = Kernel(program, "Xnrm2");
auto kernel2 = Kernel(program, "Xnrm2Epilogue");
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index e83e73fd..b92e2cdf 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -60,7 +60,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
// Retrieves the Xscal kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index bc425f40..bfc4a739 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -64,7 +64,7 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
// Retrieves the Xswap kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc
index 24e87db0..f8985038 100644
--- a/src/routines/level2/xgemv.cc
+++ b/src/routines/level2/xgemv.cc
@@ -136,7 +136,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index dda78232..686c7e60 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -66,7 +66,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xger");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc
index aba665b0..a7116213 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cc
@@ -79,7 +79,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher");
// Sets the kernel arguments
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc
index bcd6488f..3fd1a961 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cc
@@ -68,7 +68,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher2");
// Sets the kernel arguments
diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc
index 7557dcc3..aa081e81 100644
--- a/src/routines/level3/xgemm.cc
+++ b/src/routines/level3/xgemm.cc
@@ -107,7 +107,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == m_ceiled && a_two == k_ceiled && a_ld == m_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc
index c0a4306a..d2fbf36e 100644
--- a/src/routines/level3/xhemm.cc
+++ b/src/routines/level3/xhemm.cc
@@ -61,7 +61,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the hermitian-to-squared kernel
diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc
index 4d5a4d35..2c2c815d 100644
--- a/src/routines/level3/xher2k.cc
+++ b/src/routines/level3/xher2k.cc
@@ -93,7 +93,7 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc
index 574debe4..414c4760 100644
--- a/src/routines/level3/xherk.cc
+++ b/src/routines/level3/xherk.cc
@@ -90,7 +90,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc
index 914a326a..a39026f1 100644
--- a/src/routines/level3/xsymm.cc
+++ b/src/routines/level3/xsymm.cc
@@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the symmetric-to-squared kernel
diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc
index 44d0024e..3206c669 100644
--- a/src/routines/level3/xsyr2k.cc
+++ b/src/routines/level3/xsyr2k.cc
@@ -91,7 +91,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc
index 44ed8d35..741ad064 100644
--- a/src/routines/level3/xsyrk.cc
+++ b/src/routines/level3/xsyrk.cc
@@ -87,7 +87,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
try {
// Loads the program from the database
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
// Determines whether or not temporary matrices are needed
auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc
index 484cf040..9e3b27b4 100644
--- a/src/routines/level3/xtrmm.cc
+++ b/src/routines/level3/xtrmm.cc
@@ -63,7 +63,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the triangular matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto& program = GetProgramFromCache();
+ const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the triangular-to-squared kernel