Added prototype for the TRSV routine

author: Cedric Nugteren <web@cedricnugteren.nl> 2017-01-20 11:30:32 +0100
committer: Cedric Nugteren <web@cedricnugteren.nl> 2017-01-20 11:30:32 +0100
commit: a5fd2323b6d9ce793f12618951012fcfec257b95 (patch)
tree: ea6064784185f63cc0d324f287183ea6f1ca5c90
parent: a2c0a9c5514e7cb9dbf9674843ba806b459d3544 (diff)
7 files changed, 339 insertions, 9 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a9cabac7..41982b21 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -156,7 +156,7 @@ if(NETLIB)
   set(SAMPLE_PROGRAMS_C ${SAMPLE_PROGRAMS_C} sgemm_netlib)
 endif()
 set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)
-set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv
+set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv
                     xger xgeru xgerc xher xhpr xher2 xhpr2 xsyr xspr xsyr2 xspr2)
 set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm xtrsm)
 set(LEVELX_ROUTINES xomatcopy xinvert)
diff --git a/doc/clblast.md b/doc/clblast.md
index d7be0005..d90cb61b 100644
--- a/doc/clblast.md
+++ b/doc/clblast.md
@@ -1445,6 +1445,63 @@ Arguments to TPMV:
 
 
 
+xTRSV: Solves a triangular system of equations
+-------------
+
+
+
+C++ API:
+```
+template <typename T>
+StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+                const size_t n,
+                const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                cl_command_queue* queue, cl_event* event)
+```
+
+C API:
+```
+CLBlastStatusCode CLBlastStrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
+                               const size_t n,
+                               const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                               cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                               cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastDtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
+                               const size_t n,
+                               const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                               cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                               cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastCtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
+                               const size_t n,
+                               const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                               cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                               cl_command_queue* queue, cl_event* event)
+CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal,
+                               const size_t n,
+                               const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                               cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                               cl_command_queue* queue, cl_event* event)
+```
+
+Arguments to TRSV:
+
+* `const Layout layout`: Data-layout of the matrices, either `Layout::kRowMajor` (101) for row-major layout or `Layout::kColMajor` (102) for column-major data-layout.
+* `const Triangle triangle`: The part of the array of the triangular matrix to be used, either `Triangle::kUpper` (121) or `Triangle::kLower` (122).
+* `const Transpose a_transpose`: Transposing the input matrix A, either `Transpose::kNo` (111), `Transpose::kYes` (112), or `Transpose::kConjugate` (113) for a complex-conjugate transpose.
+* `const Diagonal diagonal`: The property of the diagonal matrix, either `Diagonal::kNonUnit` (131) for non-unit values on the diagonal or `Diagonal::kUnit` (132) for unit values on the diagonal.
+* `const size_t n`: Integer size argument. This value must be positive.
+* `const cl_mem a_buffer`: OpenCL buffer to store the input A matrix.
+* `const size_t a_offset`: The offset in elements from the start of the input A matrix.
+* `const size_t a_ld`: Leading dimension of the input A matrix. This value must be greater than 0.
+* `cl_mem x_buffer`: OpenCL buffer to store the output x vector.
+* `const size_t x_offset`: The offset in elements from the start of the output x vector.
+* `const size_t x_inc`: Stride/increment of the output x vector. This value must be greater than 0.
+* `cl_command_queue* queue`: Pointer to an OpenCL command queue associated with a context and device to execute the routine on.
+* `cl_event* event`: Pointer to an OpenCL event to be able to wait for completion of the routine's OpenCL kernel(s). This is an optional argument.
+
+
+
 xGER: General rank-1 matrix update
 -------------
 
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index d71e392d..1bd0b58e 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -41,7 +41,7 @@ FILES = [
     "/include/clblast_netlib_c.h",
     "/src/clblast_netlib_c.cpp",
 ]
-HEADER_LINES = [117, 74, 118, 22, 29, 41, 65, 32]
+HEADER_LINES = [117, 75, 118, 22, 29, 41, 65, 32]
 FOOTER_LINES = [17, 80, 19, 18, 6, 6, 9, 2]
 
 # Different possibilities for requirements
@@ -129,7 +129,7 @@ ROUTINES = [
   Routine(True,  True,  "2a", "trmv",  T,  [S,D,C,Z,H],    ["n"],               ["layout","triangle","a_transpose","diagonal"],         ["a"],      ["x"],                        [an,xn],       [],               "n",   "Triangular matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is triangular instead.", [ald_n]),
   Routine(True,  True,  "2a", "tbmv",  T,  [S,D,C,Z,H],    ["n","k"],           ["layout","triangle","a_transpose","diagonal"],         ["a"],      ["x"],                        [an,xn],       [],               "n",   "Triangular banded matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is triangular and banded instead.", [ald_k_one]),
   Routine(True,  True,  "2a", "tpmv",  T,  [S,D,C,Z,H],    ["n"],               ["layout","triangle","a_transpose","diagonal"],         ["ap"],     ["x"],                        [apn,xn],      [],               "n",   "Triangular packed matrix-vector multiplication", "Same operation as xGEMV, but matrix _A_ is a triangular packed matrix instead and repreented as _AP_.", []),
-  Routine(False, True,  "2a", "trsv",  T,  [S,D,C,Z],      ["n"],               ["layout","triangle","a_transpose","diagonal"],         ["a"],      ["x"],                        [an,xn],       [],               "",    "Solves a triangular system of equations", "", []),
+  Routine(True,  True,  "2a", "trsv",  T,  [S,D,C,Z],      ["n"],               ["layout","triangle","a_transpose","diagonal"],         ["a"],      ["x"],                        [an,xn],       [],               "",    "Solves a triangular system of equations", "", []),
   Routine(False, True,  "2a", "tbsv",  T,  [S,D,C,Z],      ["n","k"],           ["layout","triangle","a_transpose","diagonal"],         ["a"],      ["x"],                        [an,xn],       [],               "",    "Solves a banded triangular system of equations", "", [ald_k_one]),
   Routine(False, True,  "2a", "tpsv",  T,  [S,D,C,Z],      ["n"],               ["layout","triangle","a_transpose","diagonal"],         ["ap"],     ["x"],                        [apn,xn],      [],               "",    "Solves a packed triangular system of equations", "", []),
   # Level 2: matrix update
diff --git a/src/clblast.cpp b/src/clblast.cpp
index 68671e50..ef1cedf9 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -45,6 +45,7 @@
 #include "routines/level2/xtrmv.hpp"
 #include "routines/level2/xtbmv.hpp"
 #include "routines/level2/xtpmv.hpp"
+#include "routines/level2/xtrsv.hpp"
 #include "routines/level2/xger.hpp"
 #include "routines/level2/xgeru.hpp"
 #include "routines/level2/xgerc.hpp"
@@ -1146,12 +1147,20 @@ template StatusCode PUBLIC_API Tpmv<half>(const Layout, const Triangle, const Tr
 
 // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
 template <typename T>
-StatusCode Trsv(const Layout, const Triangle, const Transpose, const Diagonal,
-                const size_t,
-                const cl_mem, const size_t, const size_t,
-                cl_mem, const size_t, const size_t,
-                cl_command_queue*, cl_event*) {
-  return StatusCode::kNotImplemented;
+StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+                const size_t n,
+                const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+                cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+                cl_command_queue* queue, cl_event* event) {
+  try {
+    auto queue_cpp = Queue(*queue);
+    auto routine = Xtrsv<T>(queue_cpp, event);
+    routine.DoTrsv(layout, triangle, a_transpose, diagonal,
+                   n,
+                   Buffer<T>(a_buffer), a_offset, a_ld,
+                   Buffer<T>(x_buffer), x_offset, x_inc);
+    return StatusCode::kSuccess;
+  } catch (...) { return DispatchException(); }
 }
 template StatusCode PUBLIC_API Trsv<float>(const Layout, const Triangle, const Transpose, const Diagonal,
                                            const size_t,
diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp
new file mode 100644
index 00000000..d5d5a7ca
--- /dev/null
+++ b/src/routines/level2/xtrsv.cpp
@@ -0,0 +1,66 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xtrsv class (see the header for information about the class).
+//
+// =================================================================================================
+
+#include "routines/level2/xtrsv.hpp"
+
+#include <string>
+#include <vector>
+
+namespace clblast {
+// =================================================================================================
+
+// Constructor: forwards to base class constructor
+template <typename T>
+Xtrsv<T>::Xtrsv(Queue &queue, EventPointer event, const std::string &name):
+    Xgemv<T>(queue, event, name) {
+}
+
+// =================================================================================================
+
+// The main routine
+template <typename T>
+void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle,
+                      const Transpose a_transpose, const Diagonal diagonal,
+                      const size_t n,
+                      const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+                      const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+
+  // Makes sure all dimensions are larger than zero
+  if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
+
+  // Tests the matrix and vector
+  TestMatrixA(n, n, a_buffer, a_offset, a_ld);
+  TestVectorX(n, x_buffer, x_offset, x_inc);
+
+  // Creates a copy of X: a temporary scratch buffer
+  auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
+  x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
+
+  // The data is either in the upper or lower triangle
+  size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
+                     (triangle == Triangle::kLower && layout == Layout::kRowMajor));
+
+  // TODO: Implement the routine
+}
+
+// =================================================================================================
+
+// Compiles the templated class
+template class Xtrsv<half>;
+template class Xtrsv<float>;
+template class Xtrsv<double>;
+template class Xtrsv<float2>;
+template class Xtrsv<double2>;
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/routines/level2/xtrsv.hpp b/src/routines/level2/xtrsv.hpp
new file mode 100644
index 00000000..4a73b5eb
--- /dev/null
+++ b/src/routines/level2/xtrsv.hpp
@@ -0,0 +1,47 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xtrsv routine.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XTRSV_H_
+#define CLBLAST_ROUTINES_XTRSV_H_
+
+#include "routines/level2/xgemv.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xtrsv: public Xgemv<T> {
+ public:
+
+  // Uses the generic matrix-vector routine
+  using Xgemv<T>::queue_;
+  using Xgemv<T>::context_;
+  using Xgemv<T>::MatVec;
+
+  // Constructor
+  Xtrsv(Queue &queue, EventPointer event, const std::string &name = "TRSV");
+
+  // Templated-precision implementation of the routine
+  void DoTrsv(const Layout layout, const Triangle triangle,
+              const Transpose a_transpose, const Diagonal diagonal,
+              const size_t n,
+              const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+              const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XTRSV_H_
+#endif
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
new file mode 100644
index 00000000..67094b3d
--- /dev/null
+++ b/test/routines/level2/xtrsv.hpp
@@ -0,0 +1,151 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements a class with static methods to describe the Xtrsv routine. Examples of
+// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
+// static methods are used by the correctness tester and the performance tester.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_TEST_ROUTINES_XTRSV_H_
+#define CLBLAST_TEST_ROUTINES_XTRSV_H_
+
+#include <vector>
+#include <string>
+
+#ifdef CLBLAST_REF_CLBLAS
+  #include "test/wrapper_clblas.hpp"
+#endif
+#ifdef CLBLAST_REF_CBLAS
+  #include "test/wrapper_cblas.hpp"
+#endif
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class TestXtrsv {
+ public:
+
+  // The BLAS level: 1, 2, or 3
+  static size_t BLASLevel() { return 2; }
+
+  // The list of arguments relevant for this routine
+  static std::vector<std::string> GetOptions() {
+    return {kArgN,
+            kArgLayout, kArgTriangle, kArgATransp, kArgDiagonal,
+            kArgALeadDim, kArgXInc,
+            kArgAOffset, kArgXOffset};
+  }
+
+  // Describes how to obtain the sizes of the buffers
+  static size_t GetSizeX(const Arguments<T> &args) {
+    return args.n * args.x_inc + args.x_offset;
+  }
+  static size_t GetSizeA(const Arguments<T> &args) {
+    return args.n * args.a_ld + args.a_offset;
+  }
+
+  // Describes how to set the sizes of all the buffers
+  static void SetSizes(Arguments<T> &args) {
+    args.a_size = GetSizeA(args);
+    args.x_size = GetSizeX(args);
+  }
+
+  // Describes what the default values of the leading dimensions of the matrices are
+  static size_t DefaultLDA(const Arguments<T> &args) { return args.n; }
+  static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine
+  static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine
+
+  // Describes which transpose options are relevant for this routine
+  using Transposes = std::vector<Transpose>;
+  static Transposes GetATransposes(const Transposes &all) { return all; }
+  static Transposes GetBTransposes(const Transposes &) { return {}; } // N/A for this routine
+
+  // Describes how to run the CLBlast routine
+  static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+    auto queue_plain = queue();
+    auto event = cl_event{};
+    auto status = Trsv<T>(args.layout, args.triangle, args.a_transpose, args.diagonal,
+                          args.n,
+                          buffers.a_mat(), args.a_offset, args.a_ld,
+                          buffers.x_vec(), args.x_offset, args.x_inc,
+                          &queue_plain, &event);
+    if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); }
+    return status;
+  }
+
+  // Describes how to run the clBLAS routine (for correctness/performance comparison)
+  #ifdef CLBLAST_REF_CLBLAS
+    static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+      auto queue_plain = queue();
+      auto event = cl_event{};
+      auto status = clblasXtrsv<T>(convertToCLBLAS(args.layout),
+                                   convertToCLBLAS(args.triangle),
+                                   convertToCLBLAS(args.a_transpose),
+                                   convertToCLBLAS(args.diagonal),
+                                   args.n,
+                                   buffers.a_mat, args.a_offset, args.a_ld,
+                                   buffers.x_vec, args.x_offset, args.x_inc,
+                                   1, &queue_plain, 0, nullptr, &event);
+      clWaitForEvents(1, &event);
+      return static_cast<StatusCode>(status);
+    }
+  #endif
+
+  // Describes how to run the CPU BLAS routine (for correctness/performance comparison)
+  #ifdef CLBLAST_REF_CBLAS
+    static StatusCode RunReference2(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+      std::vector<T> a_mat_cpu(args.a_size, static_cast<T>(0));
+      std::vector<T> x_vec_cpu(args.x_size, static_cast<T>(0));
+      buffers.a_mat.Read(queue, args.a_size, a_mat_cpu);
+      buffers.x_vec.Read(queue, args.x_size, x_vec_cpu);
+      cblasXtrsv(convertToCBLAS(args.layout),
+                 convertToCBLAS(args.triangle),
+                 convertToCBLAS(args.a_transpose),
+                 convertToCBLAS(args.diagonal),
+                 args.n,
+                 a_mat_cpu, args.a_offset, args.a_ld,
+                 x_vec_cpu, args.x_offset, args.x_inc);
+      buffers.x_vec.Write(queue, args.x_size, x_vec_cpu);
+      return StatusCode::kSuccess;
+    }
+  #endif
+
+  // Describes how to download the results of the computation (more importantly: which buffer)
+  static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) {
+    std::vector<T> result(args.x_size, static_cast<T>(0));
+    buffers.x_vec.Read(queue, args.x_size, result);
+    return result;
+  }
+
+  // Describes how to compute the indices of the result buffer
+  static size_t ResultID1(const Arguments<T> &args) {
+    return args.n;
+  }
+  static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
+  static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
+    return id1*args.x_inc + args.x_offset;
+  }
+
+  // Describes how to compute performance metrics
+  static size_t GetFlops(const Arguments<T> &args) {
+    return 2 * args.n * args.n;
+  }
+  static size_t GetBytes(const Arguments<T> &args) {
+    return (args.n*args.n + 2*args.n + args.n) * sizeof(T);
+  }
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_TEST_ROUTINES_XTRSV_H_
+#endif
author	Cedric Nugteren <web@cedricnugteren.nl>	2017-01-20 11:30:32 +0100
committer	Cedric Nugteren <web@cedricnugteren.nl>	2017-01-20 11:30:32 +0100
commit	a5fd2323b6d9ce793f12618951012fcfec257b95 (patch)
tree	ea6064784185f63cc0d324f287183ea6f1ca5c90
parent	a2c0a9c5514e7cb9dbf9674843ba806b459d3544 (diff)