summaryrefslogtreecommitdiff
path: root/src/clblast.cc
diff options
context:
space:
mode:
authorCNugteren <web@cedricnugteren.nl>2015-08-22 17:11:20 +0200
committerCNugteren <web@cedricnugteren.nl>2015-08-22 17:11:20 +0200
commitff0c54c3865b45eff807315262e73d3f01cb19c3 (patch)
tree839e9def73fb068f988b07e1e879ecce48d884c8 /src/clblast.cc
parent75517353d505de1d3979866060261a666aebfd36 (diff)
Added the XSWAP, XSCAL and XCOPY level-1 routines
Diffstat (limited to 'src/clblast.cc')
-rw-r--r--src/clblast.cc129
1 files changed, 115 insertions, 14 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index 12c7b880..c99ad7b1 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -18,6 +18,9 @@
#include "clblast.h"
// BLAS level-1 includes
+#include "internal/routines/level1/xswap.h"
+#include "internal/routines/level1/xscal.h"
+#include "internal/routines/level1/xcopy.h"
#include "internal/routines/level1/xaxpy.h"
// BLAS level-2 includes
@@ -40,41 +43,139 @@ namespace clblast {
// BLAS level-1 (vector-vector) routines
// =================================================================================================
+// SWAP
+template <typename T>
+StatusCode Swap(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xswap<T>(queue_cpp, event_cpp);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoSwap(n,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+}
+template StatusCode Swap<float>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Swap<double>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Swap<float2>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Swap<double2>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+
+// SCAL
+template <typename T>
+StatusCode Scal(const size_t n,
+ const T alpha,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xscal<T>(queue_cpp, event_cpp);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoScal(n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+}
+template StatusCode Scal<float>(const size_t,
+ const float,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Scal<double>(const size_t,
+ const double,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Scal<float2>(const size_t,
+ const float2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Scal<double2>(const size_t,
+ const double2,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+
+// COPY
+template <typename T>
+StatusCode Copy(const size_t n,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto queue_cpp = Queue(*queue);
+ auto event_cpp = Event(*event);
+ auto routine = Xcopy<T>(queue_cpp, event_cpp);
+ auto status = routine.SetUp();
+ if (status != StatusCode::kSuccess) { return status; }
+ return routine.DoCopy(n,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+}
+template StatusCode Copy<float>(const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Copy<double>(const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Copy<float2>(const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Copy<double2>(const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue* queue, cl_event* event);
+
// AXPY
template <typename T>
-StatusCode Axpy(const size_t n, const T alpha,
+StatusCode Axpy(const size_t n,
+ const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
auto queue_cpp = Queue(*queue);
auto event_cpp = Event(*event);
auto routine = Xaxpy<T>(queue_cpp, event_cpp);
-
- // Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
-
- // Runs the routine
- return routine.DoAxpy(n, alpha,
+ return routine.DoAxpy(n,
+ alpha,
Buffer<T>(x_buffer), x_offset, x_inc,
Buffer<T>(y_buffer), y_offset, y_inc);
}
-template StatusCode Axpy<float>(const size_t, const float,
+template StatusCode Axpy<float>(const size_t,
+ const float,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*);
-template StatusCode Axpy<double>(const size_t, const double,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Axpy<double>(const size_t,
+ const double,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*);
-template StatusCode Axpy<float2>(const size_t, const float2,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Axpy<float2>(const size_t,
+ const float2,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*);
-template StatusCode Axpy<double2>(const size_t, const double2,
+ cl_command_queue* queue, cl_event* event);
+template StatusCode Axpy<double2>(const size_t,
+ const double2,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
- cl_command_queue*, cl_event*);
+ cl_command_queue* queue, cl_event* event);
// =================================================================================================
// BLAS level-2 (matrix-vector) routines