diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/clblast.cc | 33 | ||||
-rw-r--r-- | src/clblast_c.cc | 42 | ||||
-rw-r--r-- | src/kernels/level1/xamax.opencl | 7 |
3 files changed, 80 insertions, 2 deletions
diff --git a/src/clblast.cc b/src/clblast.cc index a5bb6b67..4d7c9986 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -32,6 +32,7 @@ #include "internal/routines/level1/xsum.h" // non-BLAS function #include "internal/routines/level1/xamax.h" #include "internal/routines/level1/xmax.h" // non-BLAS function +#include "internal/routines/level1/xmin.h" // non-BLAS function // BLAS level-2 includes #include "internal/routines/level2/xgemv.h" @@ -525,6 +526,37 @@ template StatusCode PUBLIC_API Max<double2>(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN +template <typename T> +StatusCode Min(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto routine = Xmin<T>(queue_cpp, event); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoMin(n, + Buffer<T>(imin_buffer), imin_offset, + Buffer<T>(x_buffer), x_offset, x_inc); +} +template StatusCode PUBLIC_API Min<float>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min<double>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min<float2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min<double2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= @@ -1880,6 +1912,7 @@ StatusCode FillCache(const cl_device_id device) { Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp(); Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp(); Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp(); + Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp(); // Runs all the level 2 set-up functions Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp(); diff --git a/src/clblast_c.cc b/src/clblast_c.cc index 47ab1798..1fc63de2 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -601,6 +601,48 @@ StatusCode CLBlastiZmax(const size_t n, return static_cast<StatusCode>(status); } +// MIN +StatusCode CLBlastiSmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Min<float>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiDmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Min<double>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiCmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Min<float2>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiZmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Min<double2>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl index 58b75ce2..48d0eb5c 100644 --- a/src/kernels/level1/xamax.opencl +++ b/src/kernels/level1/xamax.opencl @@ -41,7 +41,7 @@ __kernel void Xamax(const int n, const int num_groups = get_num_groups(0); // Performs loading and the first steps of the reduction - #if defined(ROUTINE_MAX) // non-absolute version + #if defined(ROUTINE_MAX) || defined(ROUTINE_MIN) // non-absolute version singlereal max = SMALLEST; #else singlereal max = ZERO; @@ -55,7 +55,10 @@ __kernel void Xamax(const int n, #else singlereal x = xgm[x_index]; #endif - #if defined(ROUTINE_MAX) // non-absolute version + #if defined(ROUTINE_MAX) // non-absolute maximum version + // nothing special here + #elif defined(ROUTINE_MIN) // non-absolute minimum version + x = -x; #else x = fabs(x); #endif |