diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-05-12 20:01:33 -0700 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-05-12 20:01:33 -0700 |
commit | f151e56daa617e3327826f06f0765d1673fa8cfd (patch) | |
tree | 66396978988720155adf4f6eb21b921758ccd8aa /src | |
parent | 86e8df60f1598760511b059b42a9e4f9dddfa150 (diff) |
Added the IxAMIN routines: absolute minimum version of IxAMAX
Diffstat (limited to 'src')
-rw-r--r-- | src/clblast.cpp | 37 | ||||
-rw-r--r-- | src/clblast_c.cpp | 67 | ||||
-rw-r--r-- | src/clblast_netlib_c.cpp | 90 | ||||
-rw-r--r-- | src/kernels/level1/xamax.opencl | 4 | ||||
-rw-r--r-- | src/routines/level1/xamin.hpp | 49 |
5 files changed, 246 insertions, 1 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp index 78548eba..1eb1ddd7 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -30,6 +30,7 @@ #include "routines/level1/xasum.hpp" #include "routines/level1/xsum.hpp" // non-BLAS routine #include "routines/level1/xamax.hpp" +#include "routines/level1/xamin.hpp" // non-BLAS routine #include "routines/level1/xmax.hpp" // non-BLAS routine #include "routines/level1/xmin.hpp" // non-BLAS routine @@ -550,6 +551,42 @@ template StatusCode PUBLIC_API Amax<half>(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +// Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN +template <typename T> +StatusCode Amin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + auto queue_cpp = Queue(*queue); + auto routine = Xamin<T>(queue_cpp, event); + routine.DoAmin(n, + Buffer<unsigned int>(imin_buffer), imin_offset, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } +} +template StatusCode PUBLIC_API Amin<float>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amin<double>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amin<float2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amin<double2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amin<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + // Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX template <typename T> StatusCode Max(const size_t n, diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp index b6a64749..d2656274 100644 --- a/src/clblast_c.cpp +++ b/src/clblast_c.cpp @@ -820,6 +820,73 @@ CLBlastStatusCode CLBlastiHamax(const size_t n, } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } +// AMIN +CLBlastStatusCode CLBlastiSamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + return static_cast<CLBlastStatusCode>( + clblast::Amin<float>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event) + ); + } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } +} +CLBlastStatusCode CLBlastiDamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + return static_cast<CLBlastStatusCode>( + clblast::Amin<double>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event) + ); + } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } +} +CLBlastStatusCode CLBlastiCamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + return static_cast<CLBlastStatusCode>( + clblast::Amin<float2>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event) + ); + } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } +} +CLBlastStatusCode CLBlastiZamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + return static_cast<CLBlastStatusCode>( + clblast::Amin<double2>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event) + ); + } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } +} +CLBlastStatusCode CLBlastiHamin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + return static_cast<CLBlastStatusCode>( + clblast::Amin<half>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event) + ); + } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } +} + // MAX CLBlastStatusCode CLBlastiSmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, diff --git a/src/clblast_netlib_c.cpp b/src/clblast_netlib_c.cpp index 3fbabd43..d3b9b5e6 100644 --- a/src/clblast_netlib_c.cpp +++ b/src/clblast_netlib_c.cpp @@ -1191,6 +1191,96 @@ int cblas_izamax(const int n, return imax[0]; } +// AMIN +int cblas_isamin(const int n, + const float* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n * x_inc; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer<float>(context, x_size); + auto imin_buffer = clblast::Buffer<int>(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast<const float*>(x)); + auto queue_cl = queue(); + auto s = clblast::Amin<float>(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + int imin[imin_size]; + imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin)); + return imin[0]; +} +int cblas_idamin(const int n, + const double* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n * x_inc; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer<double>(context, x_size); + auto imin_buffer = clblast::Buffer<int>(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast<const double*>(x)); + auto queue_cl = queue(); + auto s = clblast::Amin<double>(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + int imin[imin_size]; + imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin)); + return imin[0]; +} +int cblas_icamin(const int n, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n * x_inc; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer<float2>(context, x_size); + auto imin_buffer = clblast::Buffer<int>(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast<const float2*>(x)); + auto queue_cl = queue(); + auto s = clblast::Amin<float2>(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + int imin[imin_size]; + imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin)); + return imin[0]; +} +int cblas_izamin(const int n, + const void* x, const int x_inc) { + auto device = get_device(); + auto context = clblast::Context(device); + auto queue = clblast::Queue(context, device); + const auto x_size = n * x_inc; + const auto imin_size = 1; + auto x_buffer = clblast::Buffer<double2>(context, x_size); + auto imin_buffer = clblast::Buffer<int>(context, imin_size); + x_buffer.Write(queue, x_size, reinterpret_cast<const double2*>(x)); + auto queue_cl = queue(); + auto s = clblast::Amin<double2>(n, + imin_buffer(), 0, + x_buffer(), 0, x_inc, + &queue_cl); + if (s != clblast::StatusCode::kSuccess) { + throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s)); + } + int imin[imin_size]; + imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin)); + return imin[0]; +} + // MAX int cblas_ismax(const int n, const float* x, const int x_inc) { diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl index 48ad2e75..2bd2f714 100644 --- a/src/kernels/level1/xamax.opencl +++ b/src/kernels/level1/xamax.opencl @@ -7,7 +7,7 @@ // Author(s): // Cedric Nugteren <www.cedricnugteren.nl> // -// This file contains the Xamax kernel. It implements an index of absolute max computation using +// This file contains the Xamax kernel. It implements index of (absolute) min/max computation using // reduction kernels. Reduction is split in two parts. In the first (main) kernel the X vector is // loaded, followed by a per-thread and a per-workgroup reduction. The second (epilogue) kernel // is executed with a single workgroup only, computing the final result. @@ -59,6 +59,8 @@ void Xamax(const int n, // nothing special here #elif defined(ROUTINE_MIN) // non-absolute minimum version x = -x; + #elif defined(ROUTINE_AMIN) // absolute minimum version + x = -fabs(x); #else x = fabs(x); #endif diff --git a/src/routines/level1/xamin.hpp b/src/routines/level1/xamin.hpp new file mode 100644 index 00000000..6622e220 --- /dev/null +++ b/src/routines/level1/xamin.hpp @@ -0,0 +1,49 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xamin routine. The precision is implemented using a template argument. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XAMIN_H_ +#define CLBLAST_ROUTINES_XAMIN_H_ + +#include "routine.hpp" +#include "routines/level1/xamax.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xamin: public Xamax<T> { + public: + + // Members and methods from the base class + using Xamax<T>::DoAmax; + + // Constructor + Xamin(Queue &queue, EventPointer event, const std::string &name = "AMIN"): + Xamax<T>(queue, event, name) { + } + + // Forwards to the regular max-absolute version. The implementation difference is realised in the + // kernel through a pre-processor macro based on the name of the routine. + void DoAmin(const size_t n, + const Buffer<unsigned int> &imin_buffer, const size_t imin_offset, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { + DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XAMIN_H_ +#endif |