summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-05-12 20:01:33 -0700
committerCedric Nugteren <web@cedricnugteren.nl>2017-05-12 20:01:33 -0700
commitf151e56daa617e3327826f06f0765d1673fa8cfd (patch)
tree66396978988720155adf4f6eb21b921758ccd8aa /src
parent86e8df60f1598760511b059b42a9e4f9dddfa150 (diff)
Added the IxAMIN routines: absolute minimum version of IxAMAX
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cpp37
-rw-r--r--src/clblast_c.cpp67
-rw-r--r--src/clblast_netlib_c.cpp90
-rw-r--r--src/kernels/level1/xamax.opencl4
-rw-r--r--src/routines/level1/xamin.hpp49
5 files changed, 246 insertions, 1 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp
index 78548eba..1eb1ddd7 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -30,6 +30,7 @@
#include "routines/level1/xasum.hpp"
#include "routines/level1/xsum.hpp" // non-BLAS routine
#include "routines/level1/xamax.hpp"
+#include "routines/level1/xamin.hpp" // non-BLAS routine
#include "routines/level1/xmax.hpp" // non-BLAS routine
#include "routines/level1/xmin.hpp" // non-BLAS routine
@@ -550,6 +551,42 @@ template StatusCode PUBLIC_API Amax<half>(const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+// Index of absolute minimum value in a vector (non-BLAS function): iSAMIN/iDAMIN/iCAMIN/iZAMIN/iHAMIN
+template <typename T>
+StatusCode Amin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xamin<T>(queue_cpp, event);
+ routine.DoAmin(n,
+ Buffer<unsigned int>(imin_buffer), imin_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
+}
+template StatusCode PUBLIC_API Amin<float>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Amin<double>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Amin<float2>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Amin<double2>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Amin<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
+
// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX
template <typename T>
StatusCode Max(const size_t n,
diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp
index b6a64749..d2656274 100644
--- a/src/clblast_c.cpp
+++ b/src/clblast_c.cpp
@@ -820,6 +820,73 @@ CLBlastStatusCode CLBlastiHamax(const size_t n,
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
+// AMIN
+CLBlastStatusCode CLBlastiSamin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::Amin<float>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastiDamin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::Amin<double>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastiCamin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::Amin<float2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastiZamin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::Amin<double2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastiHamin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::Amin<half>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+
// MAX
CLBlastStatusCode CLBlastiSmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
diff --git a/src/clblast_netlib_c.cpp b/src/clblast_netlib_c.cpp
index 3fbabd43..d3b9b5e6 100644
--- a/src/clblast_netlib_c.cpp
+++ b/src/clblast_netlib_c.cpp
@@ -1191,6 +1191,96 @@ int cblas_izamax(const int n,
return imax[0];
}
+// AMIN
+int cblas_isamin(const int n,
+ const float* x, const int x_inc) {
+ auto device = get_device();
+ auto context = clblast::Context(device);
+ auto queue = clblast::Queue(context, device);
+ const auto x_size = n * x_inc;
+ const auto imin_size = 1;
+ auto x_buffer = clblast::Buffer<float>(context, x_size);
+ auto imin_buffer = clblast::Buffer<int>(context, imin_size);
+ x_buffer.Write(queue, x_size, reinterpret_cast<const float*>(x));
+ auto queue_cl = queue();
+ auto s = clblast::Amin<float>(n,
+ imin_buffer(), 0,
+ x_buffer(), 0, x_inc,
+ &queue_cl);
+ if (s != clblast::StatusCode::kSuccess) {
+ throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s));
+ }
+ int imin[imin_size];
+ imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin));
+ return imin[0];
+}
+int cblas_idamin(const int n,
+ const double* x, const int x_inc) {
+ auto device = get_device();
+ auto context = clblast::Context(device);
+ auto queue = clblast::Queue(context, device);
+ const auto x_size = n * x_inc;
+ const auto imin_size = 1;
+ auto x_buffer = clblast::Buffer<double>(context, x_size);
+ auto imin_buffer = clblast::Buffer<int>(context, imin_size);
+ x_buffer.Write(queue, x_size, reinterpret_cast<const double*>(x));
+ auto queue_cl = queue();
+ auto s = clblast::Amin<double>(n,
+ imin_buffer(), 0,
+ x_buffer(), 0, x_inc,
+ &queue_cl);
+ if (s != clblast::StatusCode::kSuccess) {
+ throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s));
+ }
+ int imin[imin_size];
+ imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin));
+ return imin[0];
+}
+int cblas_icamin(const int n,
+ const void* x, const int x_inc) {
+ auto device = get_device();
+ auto context = clblast::Context(device);
+ auto queue = clblast::Queue(context, device);
+ const auto x_size = n * x_inc;
+ const auto imin_size = 1;
+ auto x_buffer = clblast::Buffer<float2>(context, x_size);
+ auto imin_buffer = clblast::Buffer<int>(context, imin_size);
+ x_buffer.Write(queue, x_size, reinterpret_cast<const float2*>(x));
+ auto queue_cl = queue();
+ auto s = clblast::Amin<float2>(n,
+ imin_buffer(), 0,
+ x_buffer(), 0, x_inc,
+ &queue_cl);
+ if (s != clblast::StatusCode::kSuccess) {
+ throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s));
+ }
+ int imin[imin_size];
+ imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin));
+ return imin[0];
+}
+int cblas_izamin(const int n,
+ const void* x, const int x_inc) {
+ auto device = get_device();
+ auto context = clblast::Context(device);
+ auto queue = clblast::Queue(context, device);
+ const auto x_size = n * x_inc;
+ const auto imin_size = 1;
+ auto x_buffer = clblast::Buffer<double2>(context, x_size);
+ auto imin_buffer = clblast::Buffer<int>(context, imin_size);
+ x_buffer.Write(queue, x_size, reinterpret_cast<const double2*>(x));
+ auto queue_cl = queue();
+ auto s = clblast::Amin<double2>(n,
+ imin_buffer(), 0,
+ x_buffer(), 0, x_inc,
+ &queue_cl);
+ if (s != clblast::StatusCode::kSuccess) {
+ throw std::runtime_error("CLBlast returned with error code " + clblast::ToString(s));
+ }
+ int imin[imin_size];
+ imin_buffer.Read(queue, imin_size, reinterpret_cast<int*>(imin));
+ return imin[0];
+}
+
// MAX
int cblas_ismax(const int n,
const float* x, const int x_inc) {
diff --git a/src/kernels/level1/xamax.opencl b/src/kernels/level1/xamax.opencl
index 48ad2e75..2bd2f714 100644
--- a/src/kernels/level1/xamax.opencl
+++ b/src/kernels/level1/xamax.opencl
@@ -7,7 +7,7 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
-// This file contains the Xamax kernel. It implements an index of absolute max computation using
+// This file contains the Xamax kernel. It implements index of (absolute) min/max computation using
// reduction kernels. Reduction is split in two parts. In the first (main) kernel the X vector is
// loaded, followed by a per-thread and a per-workgroup reduction. The second (epilogue) kernel
// is executed with a single workgroup only, computing the final result.
@@ -59,6 +59,8 @@ void Xamax(const int n,
// nothing special here
#elif defined(ROUTINE_MIN) // non-absolute minimum version
x = -x;
+ #elif defined(ROUTINE_AMIN) // absolute minimum version
+ x = -fabs(x);
#else
x = fabs(x);
#endif
diff --git a/src/routines/level1/xamin.hpp b/src/routines/level1/xamin.hpp
new file mode 100644
index 00000000..6622e220
--- /dev/null
+++ b/src/routines/level1/xamin.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xamin routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XAMIN_H_
+#define CLBLAST_ROUTINES_XAMIN_H_
+
+#include "routine.hpp"
+#include "routines/level1/xamax.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xamin: public Xamax<T> {
+ public:
+
+ // Members and methods from the base class
+ using Xamax<T>::DoAmax;
+
+ // Constructor
+ Xamin(Queue &queue, EventPointer event, const std::string &name = "AMIN"):
+ Xamax<T>(queue, event, name) {
+ }
+
+ // Forwards to the regular max-absolute version. The implementation difference is realised in the
+ // kernel through a pre-processor macro based on the name of the routine.
+ void DoAmin(const size_t n,
+ const Buffer<unsigned int> &imin_buffer, const size_t imin_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+ DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc);
+ }
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XAMIN_H_
+#endif