diff options
Diffstat (limited to 'src/routines/level1/xamax.cpp')
-rw-r--r-- | src/routines/level1/xamax.cpp | 100 |
1 files changed, 45 insertions, 55 deletions
diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp index 6b6e7f9e..e9efa1a7 100644 --- a/src/routines/level1/xamax.cpp +++ b/src/routines/level1/xamax.cpp @@ -22,74 +22,64 @@ namespace clblast { // Constructor: forwards to base class constructor template <typename T> Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) { - source_string_ = + Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, { #include "../../kernels/level1/xamax.opencl" - ; + }) { } // ================================================================================================= // The main routine template <typename T> -StatusCode Xamax<T>::DoAmax(const size_t n, - const Buffer<unsigned int> &imax_buffer, const size_t imax_offset, - const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { +void Xamax<T>::DoAmax(const size_t n, + const Buffer<unsigned int> &imax_buffer, const size_t imax_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { // Makes sure all dimensions are larger than zero - if (n == 0) { return StatusCode::kInvalidDimension; } + if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc); - if (ErrorIn(status)) { return status; } - status = TestVectorIndex(1, imax_buffer, imax_offset); - if (ErrorIn(status)) { return status; } + TestVectorX(n, x_buffer, x_offset, x_inc); + TestVectorIndex(1, imax_buffer, imax_offset); // Retrieves the Xamax kernels from the compiled binary - try { - const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); - auto kernel1 = Kernel(program, "Xamax"); - auto kernel2 = Kernel(program, "XamaxEpilogue"); - - // Creates the buffer for intermediate values - auto temp_size = 2*db_["WGS2"]; - auto temp_buffer1 = Buffer<T>(context_, temp_size); - auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size); - - // Sets the kernel arguments - kernel1.SetArgument(0, static_cast<int>(n)); - kernel1.SetArgument(1, x_buffer()); - kernel1.SetArgument(2, static_cast<int>(x_offset)); - kernel1.SetArgument(3, static_cast<int>(x_inc)); - kernel1.SetArgument(4, temp_buffer1()); - kernel1.SetArgument(5, temp_buffer2()); - - // Event waiting list - auto eventWaitList = std::vector<Event>(); - - // Launches the main kernel - auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size}; - auto local1 = std::vector<size_t>{db_["WGS1"]}; - auto kernelEvent = Event(); - status = RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer()); - if (ErrorIn(status)) { return status; } - eventWaitList.push_back(kernelEvent); - - // Sets the arguments for the epilogue kernel - kernel2.SetArgument(0, temp_buffer1()); - kernel2.SetArgument(1, temp_buffer2()); - kernel2.SetArgument(2, imax_buffer()); - kernel2.SetArgument(3, static_cast<int>(imax_offset)); - - // Launches the epilogue kernel - auto global2 = std::vector<size_t>{db_["WGS2"]}; - auto local2 = std::vector<size_t>{db_["WGS2"]}; - status = RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList); - if (ErrorIn(status)) { return status; } - - // Succesfully finished the computation - return StatusCode::kSuccess; - } catch (...) { return StatusCode::kInvalidKernel; } + const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); + auto kernel1 = Kernel(program, "Xamax"); + auto kernel2 = Kernel(program, "XamaxEpilogue"); + + // Creates the buffer for intermediate values + auto temp_size = 2*db_["WGS2"]; + auto temp_buffer1 = Buffer<T>(context_, temp_size); + auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size); + + // Sets the kernel arguments + kernel1.SetArgument(0, static_cast<int>(n)); + kernel1.SetArgument(1, x_buffer()); + kernel1.SetArgument(2, static_cast<int>(x_offset)); + kernel1.SetArgument(3, static_cast<int>(x_inc)); + kernel1.SetArgument(4, temp_buffer1()); + kernel1.SetArgument(5, temp_buffer2()); + + // Event waiting list + auto eventWaitList = std::vector<Event>(); + + // Launches the main kernel + auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size}; + auto local1 = std::vector<size_t>{db_["WGS1"]}; + auto kernelEvent = Event(); + RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer()); + eventWaitList.push_back(kernelEvent); + + // Sets the arguments for the epilogue kernel + kernel2.SetArgument(0, temp_buffer1()); + kernel2.SetArgument(1, temp_buffer2()); + kernel2.SetArgument(2, imax_buffer()); + kernel2.SetArgument(3, static_cast<int>(imax_offset)); + + // Launches the epilogue kernel + auto global2 = std::vector<size_t>{db_["WGS2"]}; + auto local2 = std::vector<size_t>{db_["WGS2"]}; + RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList); } // ================================================================================================= |