diff options
Diffstat (limited to 'src/routines/level2/xgemv.cpp')
-rw-r--r-- | src/routines/level2/xgemv.cpp | 127 |
1 files changed, 58 insertions, 69 deletions
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 4e32ba41..7b4c2e8f 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -22,52 +22,51 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" - ; + }) { } // ================================================================================================= // The main routine template <typename T> -StatusCode Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose, - const size_t m, const size_t n, - const T alpha, - const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) { +void Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) { // Performs the matrix-vector multiplication - return MatVec(layout, a_transpose, - m, n, alpha, - a_buffer, a_offset, a_ld, - x_buffer, x_offset, x_inc, beta, - y_buffer, y_offset, y_inc, - true, true, - 0, false, 0, 0); // N/A for this routine + MatVec(layout, a_transpose, + m, n, alpha, + a_buffer, a_offset, a_ld, + x_buffer, x_offset, x_inc, beta, + y_buffer, y_offset, y_inc, + true, true, + 0, false, 0, 0); // N/A for this routine } // ================================================================================================= // The generic implementation, also suited for other (non general) matrix-vector multiplications template <typename T> -StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, - const size_t m, const size_t n, - const T alpha, - const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, - const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, - const T beta, - const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, - bool fast_kernel, bool fast_kernel_rot, - const size_t parameter, const bool packed, - const size_t kl, const size_t ku) { +void Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, + const size_t m, const size_t n, + const T alpha, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const T beta, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc, + bool fast_kernel, bool fast_kernel_rot, + const size_t parameter, const bool packed, + const size_t kl, const size_t ku) { // Makes sure all dimensions are larger than zero - if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; } + if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); } // Computes whether or not the matrix has an alternative layout (row or column-major). auto a_altlayout = (layout == Layout::kRowMajor); @@ -91,14 +90,10 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, auto a_conjugate = (a_transpose == Transpose::kConjugate); // Tests the matrix and the vectors for validity - auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } - else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } - if (ErrorIn(status)) { return status; } - status = TestVectorX(n_real, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(m_real, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } + if (packed) { TestMatrixAP(n, a_buffer, a_offset); } + else { TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } + TestVectorX(n_real, x_buffer, x_offset, x_inc); + TestVectorY(m_real, y_buffer, y_offset, y_inc); // Determines whether or not the fast-version can be used fast_kernel = fast_kernel && (a_offset == 0) && (a_rotated == 0) && (a_conjugate == 0) && @@ -127,39 +122,33 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, } // Retrieves the Xgemv kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); - - // Sets the kernel arguments - kernel.SetArgument(0, static_cast<int>(m_real)); - kernel.SetArgument(1, static_cast<int>(n_real)); - kernel.SetArgument(2, GetRealArg(alpha)); - kernel.SetArgument(3, GetRealArg(beta)); - kernel.SetArgument(4, static_cast<int>(a_rotated)); - kernel.SetArgument(5, a_buffer()); - kernel.SetArgument(6, static_cast<int>(a_offset)); - kernel.SetArgument(7, static_cast<int>(a_ld)); - kernel.SetArgument(8, x_buffer()); - kernel.SetArgument(9, static_cast<int>(x_offset)); - kernel.SetArgument(10, static_cast<int>(x_inc)); - kernel.SetArgument(11, y_buffer()); - kernel.SetArgument(12, static_cast<int>(y_offset)); - kernel.SetArgument(13, static_cast<int>(y_inc)); - kernel.SetArgument(14, static_cast<int>(a_conjugate)); - kernel.SetArgument(15, static_cast<int>(parameter)); // extra parameter used for symm/herm - kernel.SetArgument(16, static_cast<int>(kl)); // only used for banded matrices - kernel.SetArgument(17, static_cast<int>(ku)); // only used for banded matrices - - // Launches the kernel - auto global = std::vector<size_t>{global_size}; - auto local = std::vector<size_t>{local_size}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); + auto kernel = Kernel(program, kernel_name); + + // Sets the kernel arguments + kernel.SetArgument(0, static_cast<int>(m_real)); + kernel.SetArgument(1, static_cast<int>(n_real)); + kernel.SetArgument(2, GetRealArg(alpha)); + kernel.SetArgument(3, GetRealArg(beta)); + kernel.SetArgument(4, static_cast<int>(a_rotated)); + kernel.SetArgument(5, a_buffer()); + kernel.SetArgument(6, static_cast<int>(a_offset)); + kernel.SetArgument(7, static_cast<int>(a_ld)); + kernel.SetArgument(8, x_buffer()); + kernel.SetArgument(9, static_cast<int>(x_offset)); + kernel.SetArgument(10, static_cast<int>(x_inc)); + kernel.SetArgument(11, y_buffer()); + kernel.SetArgument(12, static_cast<int>(y_offset)); + kernel.SetArgument(13, static_cast<int>(y_inc)); + kernel.SetArgument(14, static_cast<int>(a_conjugate)); + kernel.SetArgument(15, static_cast<int>(parameter)); // extra parameter used for symm/herm + kernel.SetArgument(16, static_cast<int>(kl)); // only used for banded matrices + kernel.SetArgument(17, static_cast<int>(ku)); // only used for banded matrices + + // Launches the kernel + auto global = std::vector<size_t>{global_size}; + auto local = std::vector<size_t>{local_size}; + RunKernel(kernel, queue_, device_, global, local, event_); } // ================================================================================================= |