diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-01-20 11:30:32 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-01-20 11:30:32 +0100 |
commit | a5fd2323b6d9ce793f12618951012fcfec257b95 (patch) | |
tree | ea6064784185f63cc0d324f287183ea6f1ca5c90 | |
parent | a2c0a9c5514e7cb9dbf9674843ba806b459d3544 (diff) |
Added prototype for the TRSV routine
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | doc/clblast.md | 57 | ||||
-rwxr-xr-x | scripts/generator/generator.py | 4 | ||||
-rw-r--r-- | src/clblast.cpp | 21 | ||||
-rw-r--r-- | src/routines/level2/xtrsv.cpp | 66 | ||||
-rw-r--r-- | src/routines/level2/xtrsv.hpp | 47 | ||||
-rw-r--r-- | test/routines/level2/xtrsv.hpp | 151 |
7 files changed, 339 insertions, 9 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a9cabac7..41982b21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,7 +156,7 @@ if(NETLIB) set(SAMPLE_PROGRAMS_C ${SAMPLE_PROGRAMS_C} sgemm_netlib) endif() set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) -set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv +set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv xger xgeru xgerc xher xhpr xher2 xhpr2 xsyr xspr xsyr2 xspr2) set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm) set(LEVELX_ROUTINES xomatcopy xinvert) diff --git a/doc/clblast.md b/doc/clblast.md index d7be0005..d90cb61b 100644 --- a/doc/clblast.md +++ b/doc/clblast.md @@ -1445,6 +1445,63 @@ Arguments to TPMV: +xTRSV: Solves a triangular system of equations +------------- + + + +C++ API: +``` +template <typename T> +StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) +``` + +C API: +``` +CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) +CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) +CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) +CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) +``` + +Arguments to TRSV: + +* `const Layout layout`: Data-layout of the matrices, either `Layout::kRowMajor` (101) for row-major layout or `Layout::kColMajor` (102) for column-major data-layout. +* `const Triangle triangle`: The part of the array of the triangular matrix to be used, either `Triangle::kUpper` (121) or `Triangle::kLower` (122). +* `const Transpose a_transpose`: Transposing the input matrix A, either `Transpose::kNo` (111), `Transpose::kYes` (112), or `Transpose::kConjugate` (113) for a complex-conjugate transpose. +* `const Diagonal diagonal`: The property of the diagonal matrix, either `Diagonal::kNonUnit` (131) for non-unit values on the diagonal or `Diagonal::kUnit` (132) for unit values on the diagonal. +* `const size_t n`: Integer size argument. This value must be positive. +* `const cl_mem a_buffer`: OpenCL buffer to store the input A matrix. +* `const size_t a_offset`: The offset in elements from the start of the input A matrix. +* `const size_t a_ld`: Leading dimension of the input A matrix. This value must be greater than 0. +* `cl_mem x_buffer`: OpenCL buffer to store the output x vector. +* `const size_t x_offset`: The offset in elements from the start of the output x vector. +* `const size_t x_inc`: Stride/increment of the output x vector. This value must be greater than 0. +* `cl_command_queue* queue`: Pointer to an OpenCL command queue associated with a context and device to execute the routine on. +* `cl_event* event`: Pointer to an OpenCL event to be able to wait for completion of the routine's OpenCL kernel(s). This is an optional argument. + + + xGER: General rank-1 matrix update ------------- diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index d71e392d..1bd0b58e 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -41,7 +41,7 @@ FILES = [ "/include/clblast_netlib_c.h", "/src/clblast_netlib_c.cpp", ] -HEADER_LINES = [117, 74, 118, 22, 29, 41, 65, 32] +HEADER_LINES = [117, 75, 118, 22, 29, 41, 65, 32] FOOTER_LINES = [17, 80, 19, 18, 6, 6, 9, 2] # Different possibilities for requirements @@ -129,7 +129,7 @@ ROUTINES = [ Routine(True, True, "2a", "trmv", T, [S,D,C,Z,H], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [an,xn], [], "n", "Triangular matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is triangular instead.", [ald_n]), Routine(True, True, "2a", "tbmv", T, [S,D,C,Z,H], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [an,xn], [], "n", "Triangular banded matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is triangular and banded instead.", [ald_k_one]), Routine(True, True, "2a", "tpmv", T, [S,D,C,Z,H], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [apn,xn], [], "n", "Triangular packed matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is a triangular packed matrix instead and repreented as _AP_.", []), - Routine(False, True, "2a", "trsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [an,xn], [], "", "Solves a triangular system of equations", "", []), + Routine(True, True, "2a", "trsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [an,xn], [], "", "Solves a triangular system of equations", "", []), Routine(False, True, "2a", "tbsv", T, [S,D,C,Z], ["n","k"], ["layout","triangle","a_transpose","diagonal"], ["a"], ["x"], [an,xn], [], "", "Solves a banded triangular system of equations", "", [ald_k_one]), Routine(False, True, "2a", "tpsv", T, [S,D,C,Z], ["n"], ["layout","triangle","a_transpose","diagonal"], ["ap"], ["x"], [apn,xn], [], "", "Solves a packed triangular system of equations", "", []), # Level 2: matrix update diff --git a/src/clblast.cpp b/src/clblast.cpp index 68671e50..ef1cedf9 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -45,6 +45,7 @@ #include "routines/level2/xtrmv.hpp" #include "routines/level2/xtbmv.hpp" #include "routines/level2/xtpmv.hpp" +#include "routines/level2/xtrsv.hpp" #include "routines/level2/xger.hpp" #include "routines/level2/xgeru.hpp" #include "routines/level2/xgerc.hpp" @@ -1146,12 +1147,20 @@ template StatusCode PUBLIC_API Tpmv<half>(const Layout, const Triangle, const Tr // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV template <typename T> -StatusCode Trsv(const Layout, const Triangle, const Transpose, const Diagonal, - const size_t, - const cl_mem, const size_t, const size_t, - cl_mem, const size_t, const size_t, - cl_command_queue*, cl_event*) { - return StatusCode::kNotImplemented; +StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + try { + auto queue_cpp = Queue(*queue); + auto routine = Xtrsv<T>(queue_cpp, event); + routine.DoTrsv(layout, triangle, a_transpose, diagonal, + n, + Buffer<T>(a_buffer), a_offset, a_ld, + Buffer<T>(x_buffer), x_offset, x_inc); + return StatusCode::kSuccess; + } catch (...) { return DispatchException(); } } template StatusCode PUBLIC_API Trsv<float>(const Layout, const Triangle, const Transpose, const Diagonal, const size_t, diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp new file mode 100644 index 00000000..d5d5a7ca --- /dev/null +++ b/src/routines/level2/xtrsv.cpp @@ -0,0 +1,66 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtrsv class (see the header for information about the class). +// +// ================================================================================================= + +#include "routines/level2/xtrsv.hpp" + +#include <string> +#include <vector> + +namespace clblast { +// ================================================================================================= + +// Constructor: forwards to base class constructor +template <typename T> +Xtrsv<T>::Xtrsv(Queue &queue, EventPointer event, const std::string &name): + Xgemv<T>(queue, event, name) { +} + +// ================================================================================================= + +// The main routine +template <typename T> +void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) { + + // Makes sure all dimensions are larger than zero + if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); } + + // Tests the matrix and vector + TestMatrixA(n, n, a_buffer, a_offset, a_ld); + TestVectorX(n, x_buffer, x_offset, x_inc); + + // Creates a copy of X: a temporary scratch buffer + auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset); + x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer); + + // The data is either in the upper or lower triangle + size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) || + (triangle == Triangle::kLower && layout == Layout::kRowMajor)); + + // TODO: Implement the routine +} + +// ================================================================================================= + +// Compiles the templated class +template class Xtrsv<half>; +template class Xtrsv<float>; +template class Xtrsv<double>; +template class Xtrsv<float2>; +template class Xtrsv<double2>; + +// ================================================================================================= +} // namespace clblast diff --git a/src/routines/level2/xtrsv.hpp b/src/routines/level2/xtrsv.hpp new file mode 100644 index 00000000..4a73b5eb --- /dev/null +++ b/src/routines/level2/xtrsv.hpp @@ -0,0 +1,47 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xtrsv routine. +// +// ================================================================================================= + +#ifndef CLBLAST_ROUTINES_XTRSV_H_ +#define CLBLAST_ROUTINES_XTRSV_H_ + +#include "routines/level2/xgemv.hpp" + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class Xtrsv: public Xgemv<T> { + public: + + // Uses the generic matrix-vector routine + using Xgemv<T>::queue_; + using Xgemv<T>::context_; + using Xgemv<T>::MatVec; + + // Constructor + Xtrsv(Queue &queue, EventPointer event, const std::string &name = "TRSV"); + + // Templated-precision implementation of the routine + void DoTrsv(const Layout layout, const Triangle triangle, + const Transpose a_transpose, const Diagonal diagonal, + const size_t n, + const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld, + const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc); +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_ROUTINES_XTRSV_H_ +#endif diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp new file mode 100644 index 00000000..67094b3d --- /dev/null +++ b/test/routines/level2/xtrsv.hpp @@ -0,0 +1,151 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements a class with static methods to describe the Xtrsv routine. Examples of +// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These +// static methods are used by the correctness tester and the performance tester. +// +// ================================================================================================= + +#ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_ +#define CLBLAST_TEST_ROUTINES_XTRSV_H_ + +#include <vector> +#include <string> + +#ifdef CLBLAST_REF_CLBLAS + #include "test/wrapper_clblas.hpp" +#endif +#ifdef CLBLAST_REF_CBLAS + #include "test/wrapper_cblas.hpp" +#endif + +namespace clblast { +// ================================================================================================= + +// See comment at top of file for a description of the class +template <typename T> +class TestXtrsv { + public: + + // The BLAS level: 1, 2, or 3 + static size_t BLASLevel() { return 2; } + + // The list of arguments relevant for this routine + static std::vector<std::string> GetOptions() { + return {kArgN, + kArgLayout, kArgTriangle, kArgATransp, kArgDiagonal, + kArgALeadDim, kArgXInc, + kArgAOffset, kArgXOffset}; + } + + // Describes how to obtain the sizes of the buffers + static size_t GetSizeX(const Arguments<T> &args) { + return args.n * args.x_inc + args.x_offset; + } + static size_t GetSizeA(const Arguments<T> &args) { + return args.n * args.a_ld + args.a_offset; + } + + // Describes how to set the sizes of all the buffers + static void SetSizes(Arguments<T> &args) { + args.a_size = GetSizeA(args); + args.x_size = GetSizeX(args); + } + + // Describes what the default values of the leading dimensions of the matrices are + static size_t DefaultLDA(const Arguments<T> &args) { return args.n; } + static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine + + // Describes which transpose options are relevant for this routine + using Transposes = std::vector<Transpose>; + static Transposes GetATransposes(const Transposes &all) { return all; } + static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine + + // Describes how to run the CLBlast routine + static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Trsv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal, + args.n, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.x_vec(), args.x_offset, args.x_inc, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + return status; + } + + // Describes how to run the clBLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CLBLAS + static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXtrsv<T>(convertToCLBLAS(args.layout), + convertToCLBLAS(args.triangle), + convertToCLBLAS(args.a_transpose), + convertToCLBLAS(args.diagonal), + args.n, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.x_vec, args.x_offset, args.x_inc, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + return static_cast<StatusCode>(status); + } + #endif + + // Describes how to run the CPU BLAS routine (for correctness/performance comparison) + #ifdef CLBLAST_REF_CBLAS + static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0)); + std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0)); + buffers.a_mat.Read(queue, args.a_size, a_mat_cpu); + buffers.x_vec.Read(queue, args.x_size, x_vec_cpu); + cblasXtrsv(convertToCBLAS(args.layout), + convertToCBLAS(args.triangle), + convertToCBLAS(args.a_transpose), + convertToCBLAS(args.diagonal), + args.n, + a_mat_cpu, args.a_offset, args.a_ld, + x_vec_cpu, args.x_offset, args.x_inc); + buffers.x_vec.Write(queue, args.x_size, x_vec_cpu); + return StatusCode::kSuccess; + } + #endif + + // Describes how to download the results of the computation (more importantly: which buffer) + static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { + std::vector<T> result(args.x_size, static_cast<T>(0)); + buffers.x_vec.Read(queue, args.x_size, result); + return result; + } + + // Describes how to compute the indices of the result buffer + static size_t ResultID1(const Arguments<T> &args) { + return args.n; + } + static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine + static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) { + return id1*args.x_inc + args.x_offset; + } + + // Describes how to compute performance metrics + static size_t GetFlops(const Arguments<T> &args) { + return 2 * args.n * args.n; + } + static size_t GetBytes(const Arguments<T> &args) { + return (args.n*args.n + 2*args.n + args.n) * sizeof(T); + } +}; + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_TEST_ROUTINES_XTRSV_H_ +#endif |