From e113ff0852d21ecb898b3b192145b70cad3f338a Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 30 Apr 2016 09:49:39 +0200 Subject: Added non-aboslute minimum counter-part IxMIN of the BLAS routine IxAMAX --- src/clblast.cc | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'src/clblast.cc') diff --git a/src/clblast.cc b/src/clblast.cc index a5bb6b67..4d7c9986 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -32,6 +32,7 @@ #include "internal/routines/level1/xsum.h" // non-BLAS function #include "internal/routines/level1/xamax.h" #include "internal/routines/level1/xmax.h" // non-BLAS function +#include "internal/routines/level1/xmin.h" // non-BLAS function // BLAS level-2 includes #include "internal/routines/level2/xgemv.h" @@ -525,6 +526,37 @@ template StatusCode PUBLIC_API Max(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN +template +StatusCode Min(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto routine = Xmin(queue_cpp, event); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoMin(n, + Buffer(imin_buffer), imin_offset, + Buffer(x_buffer), x_offset, x_inc); +} +template StatusCode PUBLIC_API Min(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= @@ -1880,6 +1912,7 @@ StatusCode FillCache(const cl_device_id device) { Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); Xsum(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xamax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); Xmax(queue, nullptr).SetUp(); + Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); Xmin(queue, nullptr).SetUp(); // Runs all the level 2 set-up functions Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); Xgemv(queue, nullptr).SetUp(); -- cgit v1.2.3