From ff0c54c3865b45eff807315262e73d3f01cb19c3 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Sat, 22 Aug 2015 17:11:20 +0200 Subject: Added the XSWAP, XSCAL and XCOPY level-1 routines --- src/clblast.cc | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 14 deletions(-) (limited to 'src/clblast.cc') diff --git a/src/clblast.cc b/src/clblast.cc index 12c7b880..c99ad7b1 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -18,6 +18,9 @@ #include "clblast.h" // BLAS level-1 includes +#include "internal/routines/level1/xswap.h" +#include "internal/routines/level1/xscal.h" +#include "internal/routines/level1/xcopy.h" #include "internal/routines/level1/xaxpy.h" // BLAS level-2 includes @@ -40,41 +43,139 @@ namespace clblast { // BLAS level-1 (vector-vector) routines // ================================================================================================= +// SWAP +template +StatusCode Swap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xswap(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoSwap(n, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc); +} +template StatusCode Swap(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Swap(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Swap(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Swap(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); + +// SCAL +template +StatusCode Scal(const size_t n, + const T alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xscal(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoScal(n, + alpha, + Buffer(x_buffer), x_offset, x_inc); +} +template StatusCode Scal(const size_t, + const float, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Scal(const size_t, + const double, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Scal(const size_t, + const float2, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Scal(const size_t, + const double2, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); + +// COPY +template +StatusCode Copy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto queue_cpp = Queue(*queue); + auto event_cpp = Event(*event); + auto routine = Xcopy(queue_cpp, event_cpp); + auto status = routine.SetUp(); + if (status != StatusCode::kSuccess) { return status; } + return routine.DoCopy(n, + Buffer(x_buffer), x_offset, x_inc, + Buffer(y_buffer), y_offset, y_inc); +} +template StatusCode Copy(const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Copy(const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Copy(const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); +template StatusCode Copy(const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue* queue, cl_event* event); + // AXPY template -StatusCode Axpy(const size_t n, const T alpha, +StatusCode Axpy(const size_t n, + const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_command_queue* queue, cl_event* event) { auto queue_cpp = Queue(*queue); auto event_cpp = Event(*event); auto routine = Xaxpy(queue_cpp, event_cpp); - - // Compiles the routine's device kernels auto status = routine.SetUp(); if (status != StatusCode::kSuccess) { return status; } - - // Runs the routine - return routine.DoAxpy(n, alpha, + return routine.DoAxpy(n, + alpha, Buffer(x_buffer), x_offset, x_inc, Buffer(y_buffer), y_offset, y_inc); } -template StatusCode Axpy(const size_t, const float, +template StatusCode Axpy(const size_t, + const float, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); -template StatusCode Axpy(const size_t, const double, + cl_command_queue* queue, cl_event* event); +template StatusCode Axpy(const size_t, + const double, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); -template StatusCode Axpy(const size_t, const float2, + cl_command_queue* queue, cl_event* event); +template StatusCode Axpy(const size_t, + const float2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); -template StatusCode Axpy(const size_t, const double2, + cl_command_queue* queue, cl_event* event); +template StatusCode Axpy(const size_t, + const double2, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*); + cl_command_queue* queue, cl_event* event); // ================================================================================================= // BLAS level-2 (matrix-vector) routines -- cgit v1.2.3