diff options
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | include/internal/clpp11.h | 31 | ||||
-rw-r--r-- | include/internal/routine.h | 4 | ||||
-rw-r--r-- | src/routine.cc | 2 | ||||
-rw-r--r-- | src/routines/xaxpy.cc | 2 | ||||
-rw-r--r-- | src/routines/xgemm.cc | 2 | ||||
-rw-r--r-- | src/routines/xgemv.cc | 2 | ||||
-rw-r--r-- | src/routines/xsymm.cc | 2 |
8 files changed, 15 insertions, 31 deletions
@@ -1,6 +1,7 @@ Development version (next release) - Added support for complex conjugate transpose +- Some host-code performance improvements - Added level-2 routines: SGEMV/DGEMV/CGEMV/ZGEMV - Added level-3 routines: diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h index 73040fdb..d48b646d 100644 --- a/include/internal/clpp11.h +++ b/include/internal/clpp11.h @@ -134,8 +134,7 @@ class Platform: public Object { } // Accessors to the private data-member - cl_platform_id operator()() const { return platform_; } - cl_platform_id& operator()() { return platform_; } + const cl_platform_id& operator()() const { return platform_; } private: cl_platform_id platform_; }; @@ -193,8 +192,7 @@ class Device: public Object { } // Accessors to the private data-member - cl_device_id operator()() const { return device_; } - cl_device_id& operator()() { return device_; } + const cl_device_id& operator()() const { return device_; } private: // Helper functions @@ -259,8 +257,7 @@ class Context: public ObjectWithState { } // Accessors to the private data-member - cl_context operator()() const { return context_; } - cl_context& operator()() { return context_; } + const cl_context& operator()() const { return context_; } private: cl_context context_; }; @@ -296,16 +293,6 @@ class Program: public ObjectWithState { swap(*this, other); return *this; } - /* - TODO: Implement move construction/assignment? - Program(Program &&other) { - clRetainProgram(program_); - swap(*this, other); - } - Program& operator=(Program &&other) { - swap(*this, other); - return *this; - }*/ friend void swap(Program &first, Program &second) { std::swap(first.length_, second.length_); std::swap(first.source_, second.source_); @@ -327,8 +314,7 @@ class Program: public ObjectWithState { } // Accessors to the private data-member - cl_program operator()() const { return program_; } - cl_program& operator()() { return program_; } + const cl_program& operator()() const { return program_; } private: size_t length_; std::vector<char> source_; @@ -382,8 +368,7 @@ class Kernel: public ObjectWithState { } // Accessors to the private data-member - cl_kernel operator()() const { return kernel_; } - cl_kernel& operator()() { return kernel_; } + const cl_kernel& operator()() const { return kernel_; } private: cl_kernel kernel_; }; @@ -445,8 +430,7 @@ class CommandQueue: public ObjectWithState { } // Accessors to the private data-member - cl_command_queue operator()() const { return queue_; } - cl_command_queue& operator()() { return queue_; } + const cl_command_queue& operator()() const { return queue_; } private: cl_command_queue queue_; }; @@ -511,8 +495,7 @@ class Buffer: public ObjectWithState { } // Accessors to the private data-member - cl_mem operator()() const { return buffer_; } - cl_mem& operator()() { return buffer_; } + const cl_mem& operator()() const { return buffer_; } private: cl_mem buffer_; }; diff --git a/include/internal/routine.h b/include/internal/routine.h index 4482d23b..1b2e0dbb 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -33,7 +33,7 @@ class Routine { const std::string kKhronosHalfPrecision = "cl_khr_fp16"; const std::string kKhronosDoublePrecision = "cl_khr_fp64"; - // New data-type:tThe cache of compiled OpenCL programs, along with some meta-data + // The cache of compiled OpenCL programs, along with some meta-data struct ProgramCache { Program program; std::string device_name; @@ -101,7 +101,7 @@ class Routine { // Queries the cache and retrieve either a matching program or a boolean whether a match exists. // The first assumes that the program is available in the cache and will throw an exception // otherwise. - Program GetProgramFromCache() const; + const Program& GetProgramFromCache() const; bool ProgramIsInCache() const; // Non-static variable for the precision. Note that the same variable (but static) might exist in diff --git a/src/routine.cc b/src/routine.cc index 064db754..a4e0bb37 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -308,7 +308,7 @@ StatusCode Routine::PadCopyTransposeMatrix(const size_t src_one, const size_t sr // Queries the cache and retrieves a matching program. Assumes that the match is available, throws // otherwise. -Program Routine::GetProgramFromCache() const { +const Program& Routine::GetProgramFromCache() const { for (auto &cached_program: program_cache_) { if (cached_program.MatchInCache(device_name_, precision_, routines_)) { return cached_program.program; diff --git a/src/routines/xaxpy.cc b/src/routines/xaxpy.cc index d77bf07e..b68458da 100644 --- a/src/routines/xaxpy.cc +++ b/src/routines/xaxpy.cc @@ -60,7 +60,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, // Retrieves the Xaxpy kernel from the compiled binary try { - auto program = GetProgramFromCache(); + auto& program = GetProgramFromCache(); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/xgemm.cc b/src/routines/xgemm.cc index db10899c..20cd2675 100644 --- a/src/routines/xgemm.cc +++ b/src/routines/xgemm.cc @@ -102,7 +102,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, auto temp_c = Buffer(context_, CL_MEM_READ_WRITE, m_ceiled*n_ceiled*sizeof(T)); // Loads the program from the database - auto program = GetProgramFromCache(); + auto& program = GetProgramFromCache(); // Runs the pre-processing kernels. This transposes the matrices, but also pads zeros to fill // them up until they reach a certain multiple of size (kernel parameter dependent). diff --git a/src/routines/xgemv.cc b/src/routines/xgemv.cc index 78071c17..1868dec4 100644 --- a/src/routines/xgemv.cc +++ b/src/routines/xgemv.cc @@ -100,7 +100,7 @@ StatusCode Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose, // Retrieves the Xgemv kernel from the compiled binary try { - auto program = GetProgramFromCache(); + auto& program = GetProgramFromCache(); auto kernel = Kernel(program, kernel_name); // Sets the kernel arguments diff --git a/src/routines/xsymm.cc b/src/routines/xsymm.cc index aa43593d..97f35be8 100644 --- a/src/routines/xsymm.cc +++ b/src/routines/xsymm.cc @@ -61,7 +61,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm // routine afterwards try { - auto program = GetProgramFromCache(); + auto& program = GetProgramFromCache(); auto kernel = Kernel(program, kernel_name); // Sets the arguments for the symmetric-to-squared kernel |