summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-02-25 15:30:57 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-02-25 15:30:57 +0100
commit13dc26e63d11aa5760c46291aedf448cef1345f2 (patch)
tree77ea56bbd1fa8e22da11d70b8f95b0a7e23edabe
parent6710c609354958e81be422480a996ef6348b749a (diff)
Generated PyCLBlast docstrings
-rw-r--r--scripts/generator/generator/pyclblast.py15
-rw-r--r--src/pyclblast/src/pyclblast.pyx184
2 files changed, 197 insertions, 2 deletions
diff --git a/scripts/generator/generator/pyclblast.py b/scripts/generator/generator/pyclblast.py
index 8075d209..ab719f5e 100644
--- a/scripts/generator/generator/pyclblast.py
+++ b/scripts/generator/generator/pyclblast.py
@@ -44,6 +44,7 @@ def generate_pyx(routine):
result += SEPARATOR + NL
result += NL
+ # Reference C definition
result += "cdef extern from \"clblast_c.h\":" + NL
np_dtypes = []
for flavour in routine.flavours:
@@ -54,9 +55,18 @@ def generate_pyx(routine):
np_dtypes.append(to_np_dtype(flavour))
result += "" + NL
+ # Function definition
buffers = routine.inputs[:] + routine.outputs[:]
result += "def " + routine.plain_name() + "(queue, "
result += ", ".join(routine.arguments_python()) + "):" + NL
+
+ # Documentation
+ result += indent + "\"\"\"" + NL
+ result += indent + "x" + routine.upper_name() + ": " + routine.description + NL
+ result += indent + "\"\"\"" + NL
+ result += NL
+
+ # Data types and checks
result += indent + "dtype = check_dtype([" + ", ".join(buffers) + "], "
result += "[" + ", ".join(['"%s"' % d for d in np_dtypes]) + "])" + NL
for buf in buffers:
@@ -65,11 +75,12 @@ def generate_pyx(routine):
else:
result += indent + "check_matrix("
result += buf + ", \"" + buf + "\")" + NL
- result += "" + NL
+ result += NL
+ # Buffer transformation
for buf in buffers:
result += indent + "cdef cl_mem " + buf + "_buffer = <cl_mem><size_t>" + buf + ".base_data.int_ptr" + NL
- result += "" + NL
+ result += NL
result += indent + "cdef cl_command_queue command_queue = <cl_command_queue><size_t>queue.int_ptr" + NL
result += indent + "cdef cl_event event = NULL" + NL
diff --git a/src/pyclblast/src/pyclblast.pyx b/src/pyclblast/src/pyclblast.pyx
index 9529400c..860677fd 100644
--- a/src/pyclblast/src/pyclblast.pyx
+++ b/src/pyclblast/src/pyclblast.pyx
@@ -297,6 +297,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZswap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def swap(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0):
+ """
+ xSWAP: Swap two vectors
+ """
+
dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -333,6 +337,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZscal(const size_t n, const cl_double2 alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def scal(queue, n, x, x_inc = 1, alpha = 1.0, x_offset = 0):
+ """
+ xSCAL: Vector scaling
+ """
+
dtype = check_dtype([x], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
@@ -367,6 +375,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZcopy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def copy(queue, n, x, y, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0):
+ """
+ xCOPY: Vector copy
+ """
+
dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -403,6 +415,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZaxpy(const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def axpy(queue, n, x, y, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0):
+ """
+ xAXPY: Vector-times-constant plus vector
+ """
+
dtype = check_dtype([x, y], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -437,6 +453,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDdot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def dot(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ """
+ xDOT: Dot product of two vectors
+ """
+
dtype = check_dtype([x, y, dot], ["float32", "float64"])
check_vector(x, "x")
check_vector(y, "y")
@@ -469,6 +489,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZdotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def dotu(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ """
+ xDOTU: Dot product of two complex vectors
+ """
+
dtype = check_dtype([x, y, dot], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -501,6 +525,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZdotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def dotc(queue, n, x, y, dot, x_inc = 1, y_inc = 1, x_offset = 0, y_offset = 0, dot_offset = 0):
+ """
+ xDOTC: Dot product of two complex vectors, one conjugated
+ """
+
dtype = check_dtype([x, y, dot], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -535,6 +563,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDznrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def nrm2(queue, n, x, nrm2, x_inc = 1, x_offset = 0, nrm2_offset = 0):
+ """
+ xNRM2: Euclidian norm of a vector
+ """
+
dtype = check_dtype([x, nrm2], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(nrm2, "nrm2")
@@ -571,6 +603,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDzasum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def asum(queue, n, x, asum, x_inc = 1, x_offset = 0, asum_offset = 0):
+ """
+ xASUM: Absolute sum of values in a vector
+ """
+
dtype = check_dtype([x, asum], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(asum, "asum")
@@ -607,6 +643,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDzsum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def sum(queue, n, x, sum, x_inc = 1, x_offset = 0, sum_offset = 0):
+ """
+ xSUM: Sum of values in a vector (non-BLAS function)
+ """
+
dtype = check_dtype([x, sum], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(sum, "sum")
@@ -643,6 +683,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastiZamax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def amax(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
+ """
+ xAMAX: Index of absolute maximum value in a vector
+ """
+
dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(imax, "imax")
@@ -679,6 +723,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastiZamin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def amin(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
+ """
+ xAMIN: Index of absolute minimum value in a vector (non-BLAS function)
+ """
+
dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(imin, "imin")
@@ -715,6 +763,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastiZmax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def max(queue, n, x, imax, x_inc = 1, x_offset = 0, imax_offset = 0):
+ """
+ xMAX: Index of maximum value in a vector (non-BLAS function)
+ """
+
dtype = check_dtype([x, imax], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(imax, "imax")
@@ -751,6 +803,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastiZmin(const size_t n, cl_mem imin_buffer, const size_t imin_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def min(queue, n, x, imin, x_inc = 1, x_offset = 0, imin_offset = 0):
+ """
+ xMIN: Index of minimum value in a vector (non-BLAS function)
+ """
+
dtype = check_dtype([x, imin], ["float32", "float64", "complex64", "complex128"])
check_vector(x, "x")
check_matrix(imin, "imin")
@@ -787,6 +843,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZgemv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def gemv(queue, m, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xGEMV: General matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -826,6 +886,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZgbmv(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, const size_t kl, const size_t ku, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def gbmv(queue, m, n, kl, ku, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, a_transp = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xGBMV: General banded matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -863,6 +927,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhemv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def hemv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xHEMV: Hermitian matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -896,6 +964,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def hbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xHBMV: Hermitian banded matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -929,6 +1001,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_double2 beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def hpmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xHPMV: Hermitian packed matrix-vector multiplication
+ """
+
dtype = check_dtype([ap, x, y], ["complex64", "complex128"])
check_matrix(ap, "ap")
check_vector(x, "x")
@@ -962,6 +1038,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDsymv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def symv(queue, n, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xSYMV: Symmetric matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["float32", "float64"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -995,6 +1075,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDsbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def sbmv(queue, n, k, a, x, y, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, a_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xSBMV: Symmetric banded matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x, y], ["float32", "float64"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -1028,6 +1112,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDspmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem ap_buffer, const size_t ap_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const double beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,cl_command_queue* queue, cl_event* event)
def spmv(queue, n, ap, x, y, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, beta = 0.0, lower_triangle = False, ap_offset = 0, x_offset = 0, y_offset = 0):
+ """
+ xSPMV: Symmetric packed matrix-vector multiplication
+ """
+
dtype = check_dtype([ap, x, y], ["float32", "float64"])
check_matrix(ap, "ap")
check_vector(x, "x")
@@ -1063,6 +1151,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtrmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def trmv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ """
+ xTRMV: Triangular matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -1102,6 +1194,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtbmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def tbmv(queue, n, k, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ """
+ xTBMV: Triangular banded matrix-vector multiplication
+ """
+
dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -1141,6 +1237,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtpmv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def tpmv(queue, n, ap, x, ap_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, ap_offset = 0, x_offset = 0):
+ """
+ xTPMV: Triangular packed matrix-vector multiplication
+ """
+
dtype = check_dtype([ap, x], ["float32", "float64", "complex64", "complex128"])
check_matrix(ap, "ap")
check_vector(x, "x")
@@ -1180,6 +1280,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtrsv(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,cl_command_queue* queue, cl_event* event)
def trsv(queue, n, a, x, a_ld, x_inc = 1, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, x_offset = 0):
+ """
+ xTRSV: Solves a triangular system of equations
+ """
+
dtype = check_dtype([a, x], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_vector(x, "x")
@@ -1217,6 +1321,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDger(const CLBlastLayout layout, const size_t m, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def ger(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ """
+ xGER: General rank-1 matrix update
+ """
+
dtype = check_dtype([x, y, a], ["float32", "float64"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1249,6 +1357,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZgeru(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def geru(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ """
+ xGERU: General rank-1 complex matrix update
+ """
+
dtype = check_dtype([x, y, a], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1281,6 +1393,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZgerc(const CLBlastLayout layout, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def gerc(queue, m, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, x_offset = 0, y_offset = 0, a_offset = 0):
+ """
+ xGERC: General rank-1 complex conjugated matrix update
+ """
+
dtype = check_dtype([x, y, a], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1313,6 +1429,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZher(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def her(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0):
+ """
+ xHER: Hermitian rank-1 matrix update
+ """
+
dtype = check_dtype([x, a], ["complex64", "complex128"])
check_vector(x, "x")
check_matrix(a, "a")
@@ -1344,6 +1464,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhpr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
def hpr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0):
+ """
+ xHPR: Hermitian packed rank-1 matrix update
+ """
+
dtype = check_dtype([x, ap], ["complex64", "complex128"])
check_vector(x, "x")
check_matrix(ap, "ap")
@@ -1375,6 +1499,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZher2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def her2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0):
+ """
+ xHER2: Hermitian rank-2 matrix update
+ """
+
dtype = check_dtype([x, y, a], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1408,6 +1536,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhpr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const cl_double2 alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
def hpr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0):
+ """
+ xHPR2: Hermitian packed rank-2 matrix update
+ """
+
dtype = check_dtype([x, y, ap], ["complex64", "complex128"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1441,6 +1573,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDsyr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def syr(queue, n, x, a, a_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, a_offset = 0):
+ """
+ xSYR: Symmetric rank-1 matrix update
+ """
+
dtype = check_dtype([x, a], ["float32", "float64"])
check_vector(x, "x")
check_matrix(a, "a")
@@ -1472,6 +1608,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDspr(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
def spr(queue, n, x, ap, ap_ld, x_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, ap_offset = 0):
+ """
+ xSPR: Symmetric packed rank-1 matrix update
+ """
+
dtype = check_dtype([x, ap], ["float32", "float64"])
check_vector(x, "x")
check_matrix(ap, "ap")
@@ -1503,6 +1643,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDsyr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,cl_command_queue* queue, cl_event* event)
def syr2(queue, n, x, y, a, a_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, a_offset = 0):
+ """
+ xSYR2: Symmetric rank-2 matrix update
+ """
+
dtype = check_dtype([x, y, a], ["float32", "float64"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1536,6 +1680,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastDspr2(const CLBlastLayout layout, const CLBlastTriangle triangle, const size_t n, const double alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset,cl_command_queue* queue, cl_event* event)
def spr2(queue, n, x, y, ap, ap_ld, x_inc = 1, y_inc = 1, alpha = 1.0, lower_triangle = False, x_offset = 0, y_offset = 0, ap_offset = 0):
+ """
+ xSPR2: Symmetric packed rank-2 matrix update
+ """
+
dtype = check_dtype([x, y, ap], ["float32", "float64"])
check_vector(x, "x")
check_vector(y, "y")
@@ -1571,6 +1719,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZgemm(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, const size_t m, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def gemm(queue, m, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, a_transp = False, b_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ """
+ xGEMM: General matrix-matrix multiplication
+ """
+
dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1611,6 +1763,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZsymm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def symm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ """
+ xSYMM: Symmetric matrix-matrix multiplication
+ """
+
dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1649,6 +1805,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZhemm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def hemm(queue, m, n, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, right_side = False, lower_triangle = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ """
+ xHEMM: Hermitian matrix-matrix multiplication
+ """
+
dtype = check_dtype([a, b, c], ["complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1685,6 +1845,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZsyrk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def syrk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0):
+ """
+ xSYRK: Rank-K update of a symmetric matrix
+ """
+
dtype = check_dtype([a, c], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(c, "c")
@@ -1721,6 +1885,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZherk(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const size_t n, const size_t k, const double alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def herk(queue, n, k, a, c, a_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, a_transp = False, a_offset = 0, c_offset = 0):
+ """
+ xHERK: Rank-K update of a hermitian matrix
+ """
+
dtype = check_dtype([a, c], ["complex64", "complex128"])
check_matrix(a, "a")
check_matrix(c, "c")
@@ -1755,6 +1923,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZsyr2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def syr2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ """
+ xSYR2K: Rank-2K update of a symmetric matrix
+ """
+
dtype = check_dtype([a, b, c], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1793,6 +1965,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZher2k(const CLBlastLayout layout, const CLBlastTriangle triangle, const CLBlastTranspose ab_transpose, const size_t n, const size_t k, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,cl_command_queue* queue, cl_event* event)
def her2k(queue, n, k, a, b, c, a_ld, b_ld, c_ld, alpha = 1.0, beta = 0.0, lower_triangle = False, ab_transp = False, a_offset = 0, b_offset = 0, c_offset = 0):
+ """
+ xHER2K: Rank-2K update of a hermitian matrix
+ """
+
dtype = check_dtype([a, b, c], ["complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1829,6 +2005,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtrmm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
def trmm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0):
+ """
+ xTRMM: Triangular matrix-matrix multiplication
+ """
+
dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")
@@ -1869,6 +2049,10 @@ cdef extern from "clblast_c.h":
CLBlastStatusCode CLBlastZtrsm(const CLBlastLayout layout, const CLBlastSide side, const CLBlastTriangle triangle, const CLBlastTranspose a_transpose, const CLBlastDiagonal diagonal, const size_t m, const size_t n, const cl_double2 alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,cl_command_queue* queue, cl_event* event)
def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_triangle = False, a_transp = False, unit_diagonal = False, a_offset = 0, b_offset = 0):
+ """
+ xTRSM: Solves a triangular system of equations
+ """
+
dtype = check_dtype([a, b], ["float32", "float64", "complex64", "complex128"])
check_matrix(a, "a")
check_matrix(b, "b")