diff options
Diffstat (limited to 'src/routines/level1/xaxpy.cpp')
-rw-r--r-- | src/routines/level1/xaxpy.cpp | 86 |
1 files changed, 39 insertions, 47 deletions
diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp index 3445e2b5..cbcbb3cd 100644 --- a/src/routines/level1/xaxpy.cpp +++ b/src/routines/level1/xaxpy.cpp @@ -33,18 +33,16 @@ Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name): // The main routine template <typename T> -StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, - const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, - const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) { +void Xaxpy<T>::DoAxpy(const size_t n, const T alpha, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc, + const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) { // Makes sure all dimensions are larger than zero - if (n == 0) { return StatusCode::kInvalidDimension; } + if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc); - if (ErrorIn(status)) { return status; } + TestVectorX(n, x_buffer, x_offset, x_inc); + TestVectorY(n, y_buffer, y_offset, y_inc); // Determines whether or not the fast-version can be used bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) && @@ -55,45 +53,39 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy"; // Retrieves the Xaxpy kernel from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel = Kernel(program, kernel_name); - - // Sets the kernel arguments - if (use_fast_kernel) { - kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, GetRealArg(alpha)); - kernel.SetArgument(2, x_buffer()); - kernel.SetArgument(3, y_buffer()); - } - else { - kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, GetRealArg(alpha)); - kernel.SetArgument(2, x_buffer()); - kernel.SetArgument(3, static_cast<int>(x_offset)); - kernel.SetArgument(4, static_cast<int>(x_inc)); - kernel.SetArgument(5, y_buffer()); - kernel.SetArgument(6, static_cast<int>(y_offset)); - kernel.SetArgument(7, static_cast<int>(y_inc)); - } - - // Launches the kernel - if (use_fast_kernel) { - auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; - auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - } - else { - auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); - auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; - auto local = std::vector<size_t>{db_["WGS"]}; - status = RunKernel(kernel, queue_, device_, global, local, event_); - } - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); + auto kernel = Kernel(program, kernel_name); + + // Sets the kernel arguments + if (use_fast_kernel) { + kernel.SetArgument(0, static_cast<int>(n)); + kernel.SetArgument(1, GetRealArg(alpha)); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, y_buffer()); + } + else { + kernel.SetArgument(0, static_cast<int>(n)); + kernel.SetArgument(1, GetRealArg(alpha)); + kernel.SetArgument(2, x_buffer()); + kernel.SetArgument(3, static_cast<int>(x_offset)); + kernel.SetArgument(4, static_cast<int>(x_inc)); + kernel.SetArgument(5, y_buffer()); + kernel.SetArgument(6, static_cast<int>(y_offset)); + kernel.SetArgument(7, static_cast<int>(y_inc)); + } + + // Launches the kernel + if (use_fast_kernel) { + auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])}; + auto local = std::vector<size_t>{db_["WGS"]}; + RunKernel(kernel, queue_, device_, global, local, event_); + } + else { + auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]); + auto global = std::vector<size_t>{n_ceiled/db_["WPT"]}; + auto local = std::vector<size_t>{db_["WGS"]}; + RunKernel(kernel, queue_, device_, global, local, event_); + } } // ================================================================================================= |