summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--include/internal/clpp11.h31
-rw-r--r--include/internal/routine.h4
-rw-r--r--src/routine.cc2
-rw-r--r--src/routines/xaxpy.cc2
-rw-r--r--src/routines/xgemm.cc2
-rw-r--r--src/routines/xgemv.cc2
-rw-r--r--src/routines/xsymm.cc2
8 files changed, 15 insertions, 31 deletions
diff --git a/CHANGELOG b/CHANGELOG
index b462dd37..7a8bfb54 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,7 @@
Development version (next release)
- Added support for complex conjugate transpose
+- Some host-code performance improvements
- Added level-2 routines:
SGEMV/DGEMV/CGEMV/ZGEMV
- Added level-3 routines:
diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h
index 73040fdb..d48b646d 100644
--- a/include/internal/clpp11.h
+++ b/include/internal/clpp11.h
@@ -134,8 +134,7 @@ class Platform: public Object {
}
// Accessors to the private data-member
- cl_platform_id operator()() const { return platform_; }
- cl_platform_id& operator()() { return platform_; }
+ const cl_platform_id& operator()() const { return platform_; }
private:
cl_platform_id platform_;
};
@@ -193,8 +192,7 @@ class Device: public Object {
}
// Accessors to the private data-member
- cl_device_id operator()() const { return device_; }
- cl_device_id& operator()() { return device_; }
+ const cl_device_id& operator()() const { return device_; }
private:
// Helper functions
@@ -259,8 +257,7 @@ class Context: public ObjectWithState {
}
// Accessors to the private data-member
- cl_context operator()() const { return context_; }
- cl_context& operator()() { return context_; }
+ const cl_context& operator()() const { return context_; }
private:
cl_context context_;
};
@@ -296,16 +293,6 @@ class Program: public ObjectWithState {
swap(*this, other);
return *this;
}
- /*
- TODO: Implement move construction/assignment?
- Program(Program &&other) {
- clRetainProgram(program_);
- swap(*this, other);
- }
- Program& operator=(Program &&other) {
- swap(*this, other);
- return *this;
- }*/
friend void swap(Program &first, Program &second) {
std::swap(first.length_, second.length_);
std::swap(first.source_, second.source_);
@@ -327,8 +314,7 @@ class Program: public ObjectWithState {
}
// Accessors to the private data-member
- cl_program operator()() const { return program_; }
- cl_program& operator()() { return program_; }
+ const cl_program& operator()() const { return program_; }
private:
size_t length_;
std::vector<char> source_;
@@ -382,8 +368,7 @@ class Kernel: public ObjectWithState {
}
// Accessors to the private data-member
- cl_kernel operator()() const { return kernel_; }
- cl_kernel& operator()() { return kernel_; }
+ const cl_kernel& operator()() const { return kernel_; }
private:
cl_kernel kernel_;
};
@@ -445,8 +430,7 @@ class CommandQueue: public ObjectWithState {
}
// Accessors to the private data-member
- cl_command_queue operator()() const { return queue_; }
- cl_command_queue& operator()() { return queue_; }
+ const cl_command_queue& operator()() const { return queue_; }
private:
cl_command_queue queue_;
};
@@ -511,8 +495,7 @@ class Buffer: public ObjectWithState {
}
// Accessors to the private data-member
- cl_mem operator()() const { return buffer_; }
- cl_mem& operator()() { return buffer_; }
+ const cl_mem& operator()() const { return buffer_; }
private:
cl_mem buffer_;
};
diff --git a/include/internal/routine.h b/include/internal/routine.h
index 4482d23b..1b2e0dbb 100644
--- a/include/internal/routine.h
+++ b/include/internal/routine.h
@@ -33,7 +33,7 @@ class Routine {
const std::string kKhronosHalfPrecision = "cl_khr_fp16";
const std::string kKhronosDoublePrecision = "cl_khr_fp64";
- // New data-type:tThe cache of compiled OpenCL programs, along with some meta-data
+ // The cache of compiled OpenCL programs, along with some meta-data
struct ProgramCache {
Program program;
std::string device_name;
@@ -101,7 +101,7 @@ class Routine {
// Queries the cache and retrieve either a matching program or a boolean whether a match exists.
// The first assumes that the program is available in the cache and will throw an exception
// otherwise.
- Program GetProgramFromCache() const;
+ const Program& GetProgramFromCache() const;
bool ProgramIsInCache() const;
// Non-static variable for the precision. Note that the same variable (but static) might exist in
diff --git a/src/routine.cc b/src/routine.cc
index 064db754..a4e0bb37 100644
--- a/src/routine.cc
+++ b/src/routine.cc
@@ -308,7 +308,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr
// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
// otherwise.
-Program Routine::GetProgramFromCache() const {
+const Program& Routine::GetProgramFromCache() const {
for (auto &cached_program: program_cache_) {
if (cached_program.MatchInCache(device_name_, precision_, routines_)) {
return cached_program.program;
diff --git a/src/routines/xaxpy.cc b/src/routines/xaxpy.cc
index d77bf07e..b68458da 100644
--- a/src/routines/xaxpy.cc
+++ b/src/routines/xaxpy.cc
@@ -60,7 +60,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// Retrieves the Xaxpy kernel from the compiled binary
try {
- auto program = GetProgramFromCache();
+ auto& program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/xgemm.cc b/src/routines/xgemm.cc
index db10899c..20cd2675 100644
--- a/src/routines/xgemm.cc
+++ b/src/routines/xgemm.cc
@@ -102,7 +102,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
auto temp_c = Buffer(context_, CL_MEM_READ_WRITE, m_ceiled*n_ceiled*sizeof(T));
// Loads the program from the database
- auto program = GetProgramFromCache();
+ auto& program = GetProgramFromCache();
// Runs the pre-processing kernels. This transposes the matrices, but also pads zeros to fill
// them up until they reach a certain multiple of size (kernel parameter dependent).
diff --git a/src/routines/xgemv.cc b/src/routines/xgemv.cc
index 78071c17..1868dec4 100644
--- a/src/routines/xgemv.cc
+++ b/src/routines/xgemv.cc
@@ -100,7 +100,7 @@ StatusCode Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose,
// Retrieves the Xgemv kernel from the compiled binary
try {
- auto program = GetProgramFromCache();
+ auto& program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the kernel arguments
diff --git a/src/routines/xsymm.cc b/src/routines/xsymm.cc
index aa43593d..97f35be8 100644
--- a/src/routines/xsymm.cc
+++ b/src/routines/xsymm.cc
@@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
// Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
// routine afterwards
try {
- auto program = GetProgramFromCache();
+ auto& program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
// Sets the arguments for the symmetric-to-squared kernel