summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cc33
-rw-r--r--src/clblast_c.cc42
-rw-r--r--src/kernels/level1/xamax.opencl7
3 files changed, 80 insertions, 2 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index a5bb6b67..4d7c9986 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -32,6 +32,7 @@
#include "internal/routines/level1/xsum.h" // non-BLAS function
#include "internal/routines/level1/xamax.h"
#include "internal/routines/level1/xmax.h" // non-BLAS function
+#include "internal/routines/level1/xmin.h" // non-BLAS function
// BLAS level-2 includes
#include "internal/routines/level2/xgemv.h"
@@ -525,6 +526,37 @@ template StatusCode PUBLIC_API Max<double2>(const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN
+template <typename T>
+StatusCode Min(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xmin<T>(queue_cpp, event);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoMin(n,
+ Buffer<T>(imin_buffer), imin_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+}
+template StatusCode PUBLIC_API Min<float>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Min<double>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Min<float2>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Min<double2>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
@@ -1880,6 +1912,7 @@ StatusCode FillCache(const cl_device_id device) {
Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp();
Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp();
Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp();
+ Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp();
// Runs all the level 2 set-up functions
Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp();
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 47ab1798..1fc63de2 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -601,6 +601,48 @@ StatusCode CLBlastiZmax(const size_t n,
return static_cast<StatusCode>(status);
}
+// MIN
+StatusCode CLBlastiSmin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Min<float>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastiDmin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Min<double>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastiCmin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Min<float2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastiZmin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Min<double2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl
index 58b75ce2..48d0eb5c 100644
--- a/src/kernels/level1/xamax.opencl
+++ b/src/kernels/level1/xamax.opencl
@@ -41,7 +41,7 @@ __kernel void Xamax(const int n,
const int num_groups = get_num_groups(0);
// Performs loading and the first steps of the reduction
- #if defined(ROUTINE_MAX) // non-absolute version
+ #if defined(ROUTINE_MAX) || defined(ROUTINE_MIN) // non-absolute version
singlereal max = SMALLEST;
#else
singlereal max = ZERO;
@@ -55,7 +55,10 @@ __kernel void Xamax(const int n,
#else
singlereal x = xgm[x_index];
#endif
- #if defined(ROUTINE_MAX) // non-absolute version
+ #if defined(ROUTINE_MAX) // non-absolute maximum version
+ // nothing special here
+ #elif defined(ROUTINE_MIN) // non-absolute minimum version
+ x = -x;
#else
x = fabs(x);
#endif